From ef233db92aefcf652b1d4d67f015f58a540501a7 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 12 May 2021 16:22:39 +0200 Subject: [PATCH 1/4] LUCENE-9955: Reduced state of stored fields readers. This removes most state from stored fields readers. --- ...Lucene50CompressingStoredFieldsReader.java | 32 ++++++------------- ...Lucene90CompressingStoredFieldsReader.java | 32 ++++++------------- 2 files changed, 20 insertions(+), 44 deletions(-) diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingStoredFieldsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingStoredFieldsReader.java index fdda870dddc8..69abff280218 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingStoredFieldsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingStoredFieldsReader.java @@ -95,7 +95,6 @@ public final class Lucene50CompressingStoredFieldsReader extends StoredFieldsRea private final int chunkSize; private final int packedIntsVersion; private final CompressionMode compressionMode; - private final Decompressor decompressor; private final int numDocs; private final boolean merging; private final BlockState state; @@ -114,12 +113,11 @@ private Lucene50CompressingStoredFieldsReader( this.chunkSize = reader.chunkSize; this.packedIntsVersion = reader.packedIntsVersion; this.compressionMode = reader.compressionMode; - this.decompressor = reader.decompressor.clone(); this.numDocs = reader.numDocs; this.numDirtyChunks = reader.numDirtyChunks; this.numDirtyDocs = reader.numDirtyDocs; this.merging = merging; - this.state = new BlockState(); + this.state = merging ? new BlockState() : null; this.closed = false; } @@ -171,9 +169,8 @@ public Lucene50CompressingStoredFieldsReader( packedIntsVersion = fieldsStream.readVInt(); } - decompressor = compressionMode.newDecompressor(); this.merging = false; - this.state = new BlockState(); + this.state = null; // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks @@ -445,17 +442,10 @@ private class BlockState { // the start pointer at which you can read the compressed documents private long startPointer; - private final BytesRef spare; - private final BytesRef bytes; + private final BytesRef spare = new BytesRef(); + private final BytesRef bytes = new BytesRef(); - BlockState() { - if (merging) { - spare = new BytesRef(); - bytes = new BytesRef(); - } else { - spare = bytes = null; - } - } + private final Decompressor decompressor = compressionMode.newDecompressor(); boolean contains(int docID) { return docID >= docBase && docID < docBase + chunkDocs; @@ -609,13 +599,6 @@ SerializedDocument document(int docID) throws IOException { final int totalLength = Math.toIntExact(offsets[chunkDocs]); final int numStoredFields = Math.toIntExact(this.numStoredFields[index]); - final BytesRef bytes; - if (merging) { - bytes = this.bytes; - } else { - bytes = new BytesRef(); - } - final DataInput documentInput; if (length == 0) { // empty @@ -690,6 +673,11 @@ public void skipBytes(long numBytes) throws IOException { } SerializedDocument document(int docID) throws IOException { + assert merging == (state != null); + BlockState state = this.state; + if (state == null) { + state = new BlockState(); + } if (state.contains(docID) == false) { fieldsStream.seek(indexReader.getStartPointer(docID)); state.reset(docID); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsReader.java index a52af2ef4d43..97e4c74ca713 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsReader.java @@ -78,7 +78,6 @@ public final class Lucene90CompressingStoredFieldsReader extends StoredFieldsRea private final IndexInput fieldsStream; private final int chunkSize; private final CompressionMode compressionMode; - private final Decompressor decompressor; private final int numDocs; private final boolean merging; private final BlockState state; @@ -97,13 +96,12 @@ private Lucene90CompressingStoredFieldsReader( this.maxPointer = reader.maxPointer; this.chunkSize = reader.chunkSize; this.compressionMode = reader.compressionMode; - this.decompressor = reader.decompressor.clone(); this.numDocs = reader.numDocs; this.numChunks = reader.numChunks; this.numDirtyChunks = reader.numDirtyChunks; this.numDirtyDocs = reader.numDirtyDocs; this.merging = merging; - this.state = new BlockState(); + this.state = merging ? new BlockState() : null; this.closed = false; } @@ -148,9 +146,8 @@ public Lucene90CompressingStoredFieldsReader( chunkSize = metaIn.readVInt(); - decompressor = compressionMode.newDecompressor(); this.merging = false; - this.state = new BlockState(); + this.state = null; // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks @@ -401,17 +398,10 @@ private class BlockState { // the start pointer at which you can read the compressed documents private long startPointer; - private final BytesRef spare; - private final BytesRef bytes; + private final BytesRef spare = new BytesRef(); + private final BytesRef bytes = new BytesRef(); - BlockState() { - if (merging) { - spare = new BytesRef(); - bytes = new BytesRef(); - } else { - spare = bytes = null; - } - } + private final Decompressor decompressor = compressionMode.newDecompressor(); boolean contains(int docID) { return docID >= docBase && docID < docBase + chunkDocs; @@ -522,13 +512,6 @@ SerializedDocument document(int docID) throws IOException { final int totalLength = Math.toIntExact(offsets[chunkDocs]); final int numStoredFields = Math.toIntExact(this.numStoredFields[index]); - final BytesRef bytes; - if (merging) { - bytes = this.bytes; - } else { - bytes = new BytesRef(); - } - final DataInput documentInput; if (length == 0) { // empty @@ -602,6 +585,11 @@ public void skipBytes(long numBytes) throws IOException { } SerializedDocument document(int docID) throws IOException { + assert merging == (state != null); + BlockState state = this.state; + if (state == null) { + state = new BlockState(); + } if (state.contains(docID) == false) { fieldsStream.seek(indexReader.getStartPointer(docID)); state.reset(docID); From 5f7f234564516e746922c73c603cdc9f1ccdd77e Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 12 May 2021 16:43:48 +0200 Subject: [PATCH 2/4] Handle term vectors too and remove Cloneable which isn't used anymore. --- .../compressing/Lucene50CompressingTermVectorsReader.java | 4 +--- .../org/apache/lucene/codecs/compressing/Decompressor.java | 4 +--- .../compressing/Lucene90CompressingTermVectorsReader.java | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingTermVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingTermVectorsReader.java index fcd9756137b1..32f71034d3ae 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingTermVectorsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene50/compressing/Lucene50CompressingTermVectorsReader.java @@ -86,7 +86,6 @@ public final class Lucene50CompressingTermVectorsReader extends TermVectorsReade private final int version; private final int packedIntsVersion; private final CompressionMode compressionMode; - private final Decompressor decompressor; private final int chunkSize; private final int numDocs; private boolean closed; @@ -102,7 +101,6 @@ private Lucene50CompressingTermVectorsReader(Lucene50CompressingTermVectorsReade this.indexReader = reader.indexReader.clone(); this.packedIntsVersion = reader.packedIntsVersion; this.compressionMode = reader.compressionMode; - this.decompressor = reader.decompressor.clone(); this.chunkSize = reader.chunkSize; this.numDocs = reader.numDocs; this.reader = @@ -235,7 +233,6 @@ public Lucene50CompressingTermVectorsReader( numDirtyChunks = numDirtyDocs = -1; } - decompressor = compressionMode.newDecompressor(); this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0); @@ -689,6 +686,7 @@ public Fields get(int doc) throws IOException { // decompress data final BytesRef suffixBytes = new BytesRef(); + final Decompressor decompressor = compressionMode.newDecompressor(); decompressor.decompress( vectorsStream, totalLen + totalPayloadLength, diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/Decompressor.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/Decompressor.java index fff2108a42be..8b2895df9d24 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/Decompressor.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/Decompressor.java @@ -21,7 +21,7 @@ import org.apache.lucene.util.BytesRef; /** A decompressor. */ -public abstract class Decompressor implements Cloneable { +public abstract class Decompressor { /** Sole constructor, typically called from sub-classes. */ protected Decompressor() {} @@ -42,6 +42,4 @@ protected Decompressor() {} public abstract void decompress( DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException; - @Override - public abstract Decompressor clone(); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java index ec8823d70dea..573912af9171 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java @@ -82,7 +82,6 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade private final int version; private final int packedIntsVersion; private final CompressionMode compressionMode; - private final Decompressor decompressor; private final int chunkSize; private final int numDocs; private boolean closed; @@ -99,7 +98,6 @@ private Lucene90CompressingTermVectorsReader(Lucene90CompressingTermVectorsReade this.indexReader = reader.indexReader.clone(); this.packedIntsVersion = reader.packedIntsVersion; this.compressionMode = reader.compressionMode; - this.decompressor = reader.decompressor.clone(); this.chunkSize = reader.chunkSize; this.numDocs = reader.numDocs; this.reader = @@ -202,7 +200,6 @@ public Lucene90CompressingTermVectorsReader( metaIn); } - decompressor = compressionMode.newDecompressor(); this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, PACKED_BLOCK_SIZE, 0); @@ -651,6 +648,7 @@ public Fields get(int doc) throws IOException { // decompress data final BytesRef suffixBytes = new BytesRef(); + final Decompressor decompressor = compressionMode.newDecompressor(); decompressor.decompress( vectorsStream, totalLen + totalPayloadLength, From a668b7722b6712c0025b29ae360be12d983ad009 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 12 May 2021 16:46:18 +0200 Subject: [PATCH 3/4] Spotless. --- .../java/org/apache/lucene/codecs/compressing/Decompressor.java | 1 - 1 file changed, 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/Decompressor.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/Decompressor.java index 8b2895df9d24..2217c28d2d2e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/Decompressor.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/Decompressor.java @@ -41,5 +41,4 @@ protected Decompressor() {} */ public abstract void decompress( DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException; - } From ed631f24bbd0ecc467210cdae39870b9b5573a94 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 14 May 2021 09:15:10 +0200 Subject: [PATCH 4/4] Remove clone impls. --- .../lucene87/DeflateWithPresetDictCompressionMode.java | 5 ----- .../lucene87/LZ4WithPresetDictCompressionMode.java | 5 ----- .../lucene/codecs/compressing/CompressionMode.java | 10 ---------- .../lucene90/DeflateWithPresetDictCompressionMode.java | 5 ----- .../lucene90/LZ4WithPresetDictCompressionMode.java | 5 ----- .../lucene/index/SortingStoredFieldsConsumer.java | 5 ----- .../compressing/dummy/DummyCompressingCodec.java | 5 ----- 7 files changed, 40 deletions(-) diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/DeflateWithPresetDictCompressionMode.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/DeflateWithPresetDictCompressionMode.java index 09f539ef0b50..cebb80be92fd 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/DeflateWithPresetDictCompressionMode.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/DeflateWithPresetDictCompressionMode.java @@ -151,11 +151,6 @@ public void decompress(DataInput in, int originalLength, int offset, int length, decompressor.end(); } } - - @Override - public Decompressor clone() { - return new DeflateWithPresetDictDecompressor(); - } } private static class DeflateWithPresetDictCompressor extends Compressor { diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/LZ4WithPresetDictCompressionMode.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/LZ4WithPresetDictCompressionMode.java index 5efb49b6966f..c5e85fd8c8eb 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/LZ4WithPresetDictCompressionMode.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene87/LZ4WithPresetDictCompressionMode.java @@ -139,11 +139,6 @@ public void decompress(DataInput in, int originalLength, int offset, int length, bytes.length = length; assert bytes.isValid(); } - - @Override - public Decompressor clone() { - return new LZ4WithPresetDictDecompressor(); - } } private static class LZ4WithPresetDictCompressor extends Compressor { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java index 89d2a6d8eb47..0ff079e0e3ae 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java @@ -139,11 +139,6 @@ public void decompress( bytes.offset = offset; bytes.length = length; } - - @Override - public Decompressor clone() { - return this; - } }; private static final class LZ4FastCompressor extends Compressor { @@ -238,11 +233,6 @@ public void decompress(DataInput in, int originalLength, int offset, int length, bytes.offset = offset; bytes.length = length; } - - @Override - public Decompressor clone() { - return new DeflateDecompressor(); - } } private static class DeflateCompressor extends Compressor { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/DeflateWithPresetDictCompressionMode.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/DeflateWithPresetDictCompressionMode.java index acd9a10df70d..988d359d79e0 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/DeflateWithPresetDictCompressionMode.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/DeflateWithPresetDictCompressionMode.java @@ -151,11 +151,6 @@ public void decompress(DataInput in, int originalLength, int offset, int length, decompressor.end(); } } - - @Override - public Decompressor clone() { - return new DeflateWithPresetDictDecompressor(); - } } private static class DeflateWithPresetDictCompressor extends Compressor { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/LZ4WithPresetDictCompressionMode.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/LZ4WithPresetDictCompressionMode.java index 4506514f13a3..70b3887e988a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/LZ4WithPresetDictCompressionMode.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/LZ4WithPresetDictCompressionMode.java @@ -139,11 +139,6 @@ public void decompress(DataInput in, int originalLength, int offset, int length, bytes.length = length; assert bytes.isValid(); } - - @Override - public Decompressor clone() { - return new LZ4WithPresetDictDecompressor(); - } } private static class LZ4WithPresetDictCompressor extends Compressor { diff --git a/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java b/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java index 6e7bc41622dd..ddfce1308ed2 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortingStoredFieldsConsumer.java @@ -70,11 +70,6 @@ public void decompress( bytes.offset = 0; bytes.length = length; } - - @Override - public Decompressor clone() { - return this; - } }; } }; diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/dummy/DummyCompressingCodec.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/dummy/DummyCompressingCodec.java index 1d838ee2d016..ecb7e9634270 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/dummy/DummyCompressingCodec.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/dummy/DummyCompressingCodec.java @@ -65,11 +65,6 @@ public void decompress( bytes.offset = offset; bytes.length = length; } - - @Override - public Decompressor clone() { - return this; - } }; private static final Compressor DUMMY_COMPRESSOR =