From 6cfe308cd7f2f62a51ab13c8193fae8cd199a8e0 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 16 Apr 2020 12:07:55 +0200 Subject: [PATCH 01/15] LUCENE-9324: Add an ID to SegmentCommitInfo --- .../index/DocumentsWriterPerThread.java | 2 +- .../org/apache/lucene/index/IndexWriter.java | 6 +-- .../lucene/index/SegmentCommitInfo.java | 41 ++++++++++++++----- .../org/apache/lucene/index/SegmentInfos.java | 23 +++++++++-- .../test/org/apache/lucene/index/TestDoc.java | 2 +- .../TestIndexWriterThreadsToSegments.java | 2 +- .../TestOneMergeWrappingMergePolicy.java | 2 +- .../lucene/index/TestPendingDeletes.java | 6 +-- .../lucene/index/TestPendingSoftDeletes.java | 2 +- .../apache/lucene/index/TestSegmentInfos.java | 40 ++++++++++++++++-- .../lucene/index/TestSegmentMerger.java | 2 +- 11 files changed, 98 insertions(+), 30 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index fdcd7ee42c85..1286c765fbb4 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -419,7 +419,7 @@ FlushedSegment flush(DocumentsWriter.FlushNotifications flushNotifications) thro pendingUpdates.clearDeleteTerms(); segmentInfo.setFiles(new HashSet<>(directory.getCreatedFiles())); - final SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(segmentInfo, 0, flushState.softDelCountOnFlush, -1L, -1L, -1L); + final SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(segmentInfo, 0, flushState.softDelCountOnFlush, -1L, -1L, -1L, StringHelper.randomId()); if (infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", "new segment has " + (flushState.liveDocs == null ? 0 : flushState.delCountOnFlush) + " deleted docs"); infoStream.message("DWPT", "new segment has " + flushState.softDelCountOnFlush + " soft-deleted docs"); diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 99ee5cba615f..64fe46825f27 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -3003,7 +3003,7 @@ public long addIndexes(CodecReader... readers) throws IOException { notifyAll(); } } - SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, numSoftDeleted, -1L, -1L, -1L); + SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, numSoftDeleted, -1L, -1L, -1L, StringHelper.randomId()); info.setFiles(new HashSet<>(trackingDir.getCreatedFiles())); trackingDir.clearCreatedFiles(); @@ -3081,7 +3081,7 @@ private SegmentCommitInfo copySegmentAsIs(SegmentCommitInfo info, String segName info.info.getUseCompoundFile(), info.info.getCodec(), info.info.getDiagnostics(), info.info.getId(), info.info.getAttributes(), info.info.getIndexSort()); SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo, info.getDelCount(), info.getSoftDelCount(), info.getDelGen(), - info.getFieldInfosGen(), info.getDocValuesGen()); + info.getFieldInfosGen(), info.getDocValuesGen(), info.getId()); newInfo.setFiles(info.info.files()); newInfoPerCommit.setFieldInfosFiles(info.getFieldInfosFiles()); @@ -4273,7 +4273,7 @@ synchronized private void _mergeInit(MergePolicy.OneMerge merge) throws IOExcept details.put("mergeMaxNumSegments", "" + merge.maxNumSegments); details.put("mergeFactor", Integer.toString(merge.segments.size())); setDiagnostics(si, SOURCE_MERGE, details); - merge.setMergeInfo(new SegmentCommitInfo(si, 0, 0, -1L, -1L, -1L)); + merge.setMergeInfo(new SegmentCommitInfo(si, 0, 0, -1L, -1L, -1L, StringHelper.randomId())); if (infoStream.isEnabled("IW")) { infoStream.message("IW", "merge seg=" + merge.info.info.name + " " + segString(merge.segments)); diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java index 69117574698b..f6c634fe2785 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java @@ -18,6 +18,7 @@ import java.io.IOException; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -26,6 +27,8 @@ import java.util.Map.Entry; import java.util.Set; +import org.apache.lucene.util.StringHelper; + /** Embeds a [read-only] SegmentInfo and adds per-commit * fields. * @@ -35,6 +38,9 @@ public class SegmentCommitInfo { /** The {@link SegmentInfo} that we wrap. */ public final SegmentInfo info; + /** Id that uniquely identifies this segment commit. */ + private byte[] id; + // How many deleted docs in the segment: private int delCount; @@ -79,19 +85,18 @@ public class SegmentCommitInfo { /** * Sole constructor. - * - * @param info + * @param info * {@link SegmentInfo} that we wrap * @param delCount * number of deleted documents in this segment * @param delGen - * deletion generation number (used to name deletion files) + * deletion generation number (used to name deletion files) * @param fieldInfosGen - * FieldInfos generation number (used to name field-infos files) +* FieldInfos generation number (used to name field-infos files) * @param docValuesGen - * DocValues generation number (used to name doc-values updates files) + * @param id Id that uniquely identifies this segment commit. This id must be 16 bytes long. See {@link StringHelper#randomId()} */ - public SegmentCommitInfo(SegmentInfo info, int delCount, int softDelCount, long delGen, long fieldInfosGen, long docValuesGen) { + public SegmentCommitInfo(SegmentInfo info, int delCount, int softDelCount, long delGen, long fieldInfosGen, long docValuesGen, byte[] id) { this.info = info; this.delCount = delCount; this.softDelCount = softDelCount; @@ -101,6 +106,10 @@ public SegmentCommitInfo(SegmentInfo info, int delCount, int softDelCount, long this.nextWriteFieldInfosGen = fieldInfosGen == -1 ? 1 : fieldInfosGen + 1; this.docValuesGen = docValuesGen; this.nextWriteDocValuesGen = docValuesGen == -1 ? 1 : docValuesGen + 1; + this.id = id; + if (id.length != StringHelper.ID_LENGTH) { + throw new IllegalArgumentException("invalid id: " + Arrays.toString(id)); + } } /** Returns the per-field DocValues updates files. */ @@ -138,7 +147,7 @@ public void setFieldInfosFiles(Set fieldInfosFiles) { void advanceDelGen() { delGen = nextWriteDelGen; nextWriteDelGen = delGen+1; - sizeInBytes = -1; + generationAdvanced(); } /** Called if there was an exception while writing @@ -162,7 +171,7 @@ void setNextWriteDelGen(long v) { void advanceFieldInfosGen() { fieldInfosGen = nextWriteFieldInfosGen; nextWriteFieldInfosGen = fieldInfosGen + 1; - sizeInBytes = -1; + generationAdvanced(); } /** @@ -187,7 +196,7 @@ void setNextWriteFieldInfosGen(long v) { void advanceDocValuesGen() { docValuesGen = nextWriteDocValuesGen; nextWriteDocValuesGen = docValuesGen + 1; - sizeInBytes = -1; + generationAdvanced(); } /** @@ -251,7 +260,7 @@ long getBufferedDeletesGen() { void setBufferedDeletesGen(long v) { if (bufferedDeletesGen == -1) { bufferedDeletesGen = v; - sizeInBytes = -1; + generationAdvanced(); } else { throw new IllegalStateException("buffered deletes gen should only be set once"); } @@ -355,6 +364,7 @@ public String toString(int pendingDelCount) { if (softDelCount > 0) { s += " :softDel=" + softDelCount; } + s += " :id=" + StringHelper.idToString(id); return s; } @@ -366,7 +376,7 @@ public String toString() { @Override public SegmentCommitInfo clone() { - SegmentCommitInfo other = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, docValuesGen); + SegmentCommitInfo other = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, docValuesGen, id.clone()); // Not clear that we need to carry over nextWriteDelGen // (i.e. do we ever clone after a failed write and // before the next successful write?), but just do it to @@ -388,4 +398,13 @@ public SegmentCommitInfo clone() { final int getDelCount(boolean includeSoftDeletes) { return includeSoftDeletes ? getDelCount() + getSoftDelCount() : getDelCount(); } + + private void generationAdvanced() { + sizeInBytes = -1; + id = StringHelper.randomId(); + } + + public byte[] getId() { + return id.clone(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 116c2e11435e..2e94c8aed435 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -124,7 +124,9 @@ public final class SegmentInfos implements Cloneable, Iterable info.maxDoc()) { throw new CorruptIndexException("invalid deletion count: " + softDelCount + delCount + " vs maxDoc=" + info.maxDoc(), input); } - SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, dvGen); + final byte[] sciId; + if (format > VERSION_74) { + sciId = new byte[StringHelper.ID_LENGTH]; + input.readBytes(sciId, 0, sciId.length); + } else { + sciId = infos.id; + // NOCOMMIT can we do this? it would at least give us consistent BWC but we can't identify the same SCI in different commits + } + SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, dvGen, sciId); siPerCommit.setFieldInfosFiles(input.readSetOfStrings()); final Map> dvUpdateFiles; final int numDVFields = input.readInt(); @@ -460,7 +470,7 @@ private void write(Directory directory) throws IOException { try { segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT); - write(directory, segnOutput); + write(segnOutput); segnOutput.close(); directory.sync(Collections.singleton(segmentFileName)); success = true; @@ -479,7 +489,7 @@ private void write(Directory directory) throws IOException { } /** Write ourselves to the provided {@link IndexOutput} */ - public void write(Directory directory, IndexOutput out) throws IOException { + public void write(IndexOutput out) throws IOException { CodecUtil.writeIndexHeader(out, "segments", VERSION_CURRENT, StringHelper.randomId(), Long.toString(generation, Character.MAX_RADIX)); out.writeVInt(Version.LATEST.major); @@ -537,6 +547,11 @@ public void write(Directory directory, IndexOutput out) throws IOException { throw new IllegalStateException("cannot write segment: invalid maxDoc segment=" + si.name + " maxDoc=" + si.maxDoc() + " softDelCount=" + softDelCount); } out.writeInt(softDelCount); + byte[] sciId = siPerCommit.getId(); + if (sciId.length != StringHelper.ID_LENGTH) { + throw new IllegalArgumentException("invalid SegmentCommitInfo#id: " + Arrays.toString(sciId)); + } + out.writeBytes(sciId, 0, sciId.length); out.writeSetOfStrings(siPerCommit.getFieldInfosFiles()); final Map> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles(); out.writeInt(dvUpdatesFiles.size()); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java index d7eea7a1ad3b..f7760704e1fa 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java @@ -238,7 +238,7 @@ private SegmentCommitInfo merge(Directory dir, SegmentCommitInfo si1, SegmentCom } } - return new SegmentCommitInfo(si, 0, 0, -1L, -1L, -1L); + return new SegmentCommitInfo(si, 0, 0, -1L, -1L, -1L, StringHelper.randomId()); } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterThreadsToSegments.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterThreadsToSegments.java index 347cb7d22354..b14a887ae519 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterThreadsToSegments.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterThreadsToSegments.java @@ -332,7 +332,7 @@ public void run() { byte id[] = readSegmentInfoID(dir, fileName); SegmentInfo si = TestUtil.getDefaultCodec().segmentInfoFormat().read(dir, segName, id, IOContext.DEFAULT); si.setCodec(codec); - SegmentCommitInfo sci = new SegmentCommitInfo(si, 0, 0, -1, -1, -1); + SegmentCommitInfo sci = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId()); SegmentReader sr = new SegmentReader(sci, Version.LATEST.major, IOContext.DEFAULT); try { thread0Count += sr.docFreq(new Term("field", "threadID0")); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestOneMergeWrappingMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestOneMergeWrappingMergePolicy.java index e240f549ecdf..e0206ccc72d1 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestOneMergeWrappingMergePolicy.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestOneMergeWrappingMergePolicy.java @@ -137,7 +137,7 @@ private static MergePolicy.MergeSpecification createRandomMergeSpecification(Dir Collections.emptyMap(), // attributes null /* indexSort */); final List segments = new LinkedList(); - segments.add(new SegmentCommitInfo(si, 0, 0, 0, 0, 0)); + segments.add(new SegmentCommitInfo(si, 0, 0, 0, 0, 0, StringHelper.randomId())); ms.add(new MergePolicy.OneMerge(segments)); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java b/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java index 143f671990b2..841ebe1f969a 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java @@ -41,7 +41,7 @@ public void testDeleteDoc() throws IOException { Directory dir = new ByteBuffersDirectory(); SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 10, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); - SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId()); PendingDeletes deletes = newPendingDeletes(commitInfo); assertNull(deletes.getLiveDocs()); int docToDelete = TestUtil.nextInt(random(), 0, 7); @@ -75,7 +75,7 @@ public void testWriteLiveDocs() throws IOException { Directory dir = new ByteBuffersDirectory(); SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 6, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); - SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId()); PendingDeletes deletes = newPendingDeletes(commitInfo); assertFalse(deletes.writeLiveDocs(dir)); assertEquals(0, dir.listAll().length); @@ -132,7 +132,7 @@ public void testIsFullyDeleted() throws IOException { Directory dir = new ByteBuffersDirectory(); SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 3, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); - SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId()); FieldInfos fieldInfos = FieldInfos.EMPTY; si.getCodec().fieldInfosFormat().write(dir, si, "", fieldInfos, IOContext.DEFAULT); PendingDeletes deletes = newPendingDeletes(commitInfo); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java b/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java index 666b3c4052e1..a7c681189042 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java @@ -150,7 +150,7 @@ public void testApplyUpdates() throws IOException { Directory dir = new ByteBuffersDirectory(); SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 10, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); - SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId()); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); for (int i = 0; i < si.maxDoc(); i++) { writer.addDocument(new Document()); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java index f5029b84d83c..abf1df1ecab6 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java @@ -27,6 +27,7 @@ import org.apache.lucene.util.Version; import java.io.IOException; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -64,7 +65,7 @@ public void testVersionsOneSegment() throws IOException { Collections.emptyMap(), id, Collections.emptyMap(), null); info.setFiles(Collections.emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); - SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId()); sis.add(commitInfo); sis.commit(dir); @@ -86,20 +87,24 @@ public void testVersionsTwoSegments() throws IOException { Collections.emptyMap(), id, Collections.emptyMap(), null); info.setFiles(Collections.emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); - SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId()); sis.add(commitInfo); info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_1", 1, false, Codec.getDefault(), Collections.emptyMap(), id, Collections.emptyMap(), null); info.setFiles(Collections.emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); - commitInfo = new SegmentCommitInfo(info, 0, 0,-1, -1, -1); + commitInfo = new SegmentCommitInfo(info, 0, 0,-1, -1, -1, StringHelper.randomId()); sis.add(commitInfo); sis.commit(dir); + byte[] commitInfoId0 = sis.info(0).getId(); + byte[] commitInfoId1 = sis.info(1).getId(); sis = SegmentInfos.readLatestCommit(dir); assertEquals(Version.LUCENE_9_0_0, sis.getMinSegmentLuceneVersion()); assertEquals(Version.LATEST, sis.getCommitLuceneVersion()); + assertEquals(StringHelper.idToString(commitInfoId0), StringHelper.idToString(sis.info(0).getId())); + assertEquals(StringHelper.idToString(commitInfoId1), StringHelper.idToString(sis.info(1).getId())); dir.close(); } @@ -145,5 +150,34 @@ public void testToString() throws Throwable{ dir.close(); } + + public void testIDChangesOnAdvance() throws IOException { + try (BaseDirectoryWrapper dir = newDirectory()) { + dir.setCheckIndexOnClose(false); + byte id[] = StringHelper.randomId(); + SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(), + Collections.emptyMap(), StringHelper.randomId(), Collections.emptyMap(), null); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, id); + assertEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); + commitInfo.advanceDelGen(); + assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); + + id = commitInfo.getId(); + commitInfo.advanceDocValuesGen(); + assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); + + id = commitInfo.getId(); + commitInfo.advanceFieldInfosGen(); + assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); + SegmentCommitInfo clone = commitInfo.clone(); + id = commitInfo.getId(); + assertEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); + assertEquals(StringHelper.idToString(id), StringHelper.idToString(clone.getId())); + + commitInfo.advanceFieldInfosGen(); + assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); + assertEquals("clone changed but shouldn't", StringHelper.idToString(id), StringHelper.idToString(clone.getId())); + } + } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java index 34f62c62bc3e..04f357e307e8 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java @@ -96,7 +96,7 @@ public void testMerge() throws IOException { //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo( mergeState.segmentInfo, - 0, 0, -1L, -1L, -1L), + 0, 0, -1L, -1L, -1L, StringHelper.randomId()), Version.LATEST.major, newIOContext(random())); assertTrue(mergedReader != null); From e2421ecc2060c12b045dc51c510b5029b54f5a3b Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 16 Apr 2020 12:10:02 +0200 Subject: [PATCH 02/15] Add missing files --- .../src/java/org/apache/lucene/index/SegmentCommitInfo.java | 5 +++-- .../src/java/org/apache/lucene/index/IndexSplitter.java | 3 ++- .../java/org/apache/lucene/replicator/nrt/PrimaryNode.java | 2 +- .../org/apache/lucene/index/BaseLiveDocsFormatTestCase.java | 4 ++-- .../org/apache/lucene/index/BaseMergePolicyTestCase.java | 6 +++--- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java index f6c634fe2785..8ce4a7b2e121 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java @@ -90,10 +90,11 @@ public class SegmentCommitInfo { * @param delCount * number of deleted documents in this segment * @param delGen - * deletion generation number (used to name deletion files) + * deletion generation number (used to name deletion files) * @param fieldInfosGen -* FieldInfos generation number (used to name field-infos files) + * FieldInfos generation number (used to name field-infos files) * @param docValuesGen + * DocValues generation number (used to name doc-values updates files) * @param id Id that uniquely identifies this segment commit. This id must be 16 bytes long. See {@link StringHelper#randomId()} */ public SegmentCommitInfo(SegmentInfo info, int delCount, int softDelCount, long delGen, long fieldInfosGen, long docValuesGen, byte[] id) { diff --git a/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java b/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java index afe45cc734a8..9f9afb4085c1 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java +++ b/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java @@ -29,6 +29,7 @@ import java.util.Locale; import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.SuppressForbidden; /** @@ -143,7 +144,7 @@ public void split(Path destDir, String[] segs) throws IOException { info.getUseCompoundFile(), info.getCodec(), info.getDiagnostics(), info.getId(), Collections.emptyMap(), null); destInfos.add(new SegmentCommitInfo(newInfo, infoPerCommit.getDelCount(), infoPerCommit.getSoftDelCount(), infoPerCommit.getDelGen(), infoPerCommit.getFieldInfosGen(), - infoPerCommit.getDocValuesGen())); + infoPerCommit.getDocValuesGen(), infoPerCommit.getId())); // now copy files over Collection files = infoPerCommit.files(); for (final String srcName : files) { diff --git a/lucene/replicator/src/java/org/apache/lucene/replicator/nrt/PrimaryNode.java b/lucene/replicator/src/java/org/apache/lucene/replicator/nrt/PrimaryNode.java index f96f6d233950..2d24f9bc4d45 100644 --- a/lucene/replicator/src/java/org/apache/lucene/replicator/nrt/PrimaryNode.java +++ b/lucene/replicator/src/java/org/apache/lucene/replicator/nrt/PrimaryNode.java @@ -245,7 +245,7 @@ private synchronized boolean setCurrentInfos(Set completedMergeFiles) th // Serialize the SegmentInfos. ByteBuffersDataOutput buffer = new ByteBuffersDataOutput(); try (ByteBuffersIndexOutput tmpIndexOutput = new ByteBuffersIndexOutput(buffer, "temporary", "temporary")) { - infos.write(dir, tmpIndexOutput); + infos.write(tmpIndexOutput); } byte[] infosBytes = buffer.toArrayCopy(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseLiveDocsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseLiveDocsFormatTestCase.java index 9c01990b1953..4f15bef21bd6 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseLiveDocsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseLiveDocsFormatTestCase.java @@ -125,10 +125,10 @@ public int length() { final Directory dir = newDirectory(); final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "foo", maxDoc, random().nextBoolean(), codec, Collections.emptyMap(), StringHelper.randomId(), Collections.emptyMap(), null); - SegmentCommitInfo sci = new SegmentCommitInfo(si, 0, 0, 0, -1, -1); + SegmentCommitInfo sci = new SegmentCommitInfo(si, 0, 0, 0, -1, -1, StringHelper.randomId()); format.writeLiveDocs(bits, dir, sci, maxDoc - numLiveDocs, IOContext.DEFAULT); - sci = new SegmentCommitInfo(si, maxDoc - numLiveDocs, 0, 1, -1, -1); + sci = new SegmentCommitInfo(si, maxDoc - numLiveDocs, 0, 1, -1, -1, StringHelper.randomId()); final Bits bits2 = format.readLiveDocs(dir, sci, IOContext.READONCE); assertEquals(maxDoc, bits2.length()); for (int i = 0; i < maxDoc; ++i) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java index 9928c8405718..94a85dffcff1 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java @@ -140,7 +140,7 @@ public void testFindForcedDeletesMerges() throws IOException { Collections.emptyMap(), // attributes null /* indexSort */); info.setFiles(Collections.emptyList()); - infos.add(new SegmentCommitInfo(info, random().nextInt(1), 0, -1, -1, -1)); + infos.add(new SegmentCommitInfo(info, random().nextInt(1), 0, -1, -1, -1, StringHelper.randomId())); } MergePolicy.MergeSpecification forcedDeletesMerges = mp.findForcedDeletesMerges(infos, context); if (forcedDeletesMerges != null) { @@ -208,7 +208,7 @@ protected static SegmentCommitInfo makeSegmentCommitInfo(String name, int maxDoc name, maxDoc, false, TestUtil.getDefaultCodec(), Collections.emptyMap(), id, Collections.singletonMap(IndexWriter.SOURCE, source), null); info.setFiles(Collections.singleton(name + "_size=" + Long.toString((long) (sizeMB * 1024 * 1024)) + ".fake")); - return new SegmentCommitInfo(info, numDeletedDocs, 0, 0, 0, 0); + return new SegmentCommitInfo(info, numDeletedDocs, 0, 0, 0, 0, StringHelper.randomId()); } /** A directory that computes the length of a file based on its name. */ @@ -331,7 +331,7 @@ protected static SegmentInfos applyDeletes(SegmentInfos infos, int numDeletes) { int newDelCount = sci.getDelCount() + segDeletes; assert newDelCount <= sci.info.maxDoc(); if (newDelCount < sci.info.maxDoc()) { // drop fully deleted segments - SegmentCommitInfo newInfo = new SegmentCommitInfo(sci.info, sci.getDelCount() + segDeletes, 0, sci.getDelGen() + 1, sci.getFieldInfosGen(), sci.getDocValuesGen()); + SegmentCommitInfo newInfo = new SegmentCommitInfo(sci.info, sci.getDelCount() + segDeletes, 0, sci.getDelGen() + 1, sci.getFieldInfosGen(), sci.getDocValuesGen(), StringHelper.randomId()); newInfoList.add(newInfo); } numDeletes -= segDeletes; From 9da9d88f48348bb3c204183e09c94501d0184776 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 16 Apr 2020 15:10:16 +0200 Subject: [PATCH 03/15] add more tests --- .../lucene/index/SegmentCommitInfo.java | 10 ++- .../apache/lucene/index/TestIndexWriter.java | 80 +++++++++++++++++++ 2 files changed, 87 insertions(+), 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java index 8ce4a7b2e121..0bb243281565 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java @@ -365,7 +365,7 @@ public String toString(int pendingDelCount) { if (softDelCount > 0) { s += " :softDel=" + softDelCount; } - s += " :id=" + StringHelper.idToString(id); + s += " :id=" + StringHelper.idToString(getId()); return s; } @@ -377,7 +377,7 @@ public String toString() { @Override public SegmentCommitInfo clone() { - SegmentCommitInfo other = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, docValuesGen, id.clone()); + SegmentCommitInfo other = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, docValuesGen, getId()); // Not clear that we need to carry over nextWriteDelGen // (i.e. do we ever clone after a failed write and // before the next successful write?), but just do it to @@ -402,10 +402,14 @@ final int getDelCount(boolean includeSoftDeletes) { private void generationAdvanced() { sizeInBytes = -1; - id = StringHelper.randomId(); + id = null; } public byte[] getId() { + if (id == null) { + // we advanced a generation - need to generate a new ID + id = StringHelper.randomId(); + } return id.clone(); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index 42a85d00285f..cb02f794aa2b 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -2484,8 +2484,11 @@ public void testIds() throws Exception { assertEquals(StringHelper.ID_LENGTH, id1.length); byte[] id2 = sis.info(0).info.getId(); + byte[] sciId2 = sis.info(0).getId(); assertNotNull(id2); + assertNotNull(sciId2); assertEquals(StringHelper.ID_LENGTH, id2.length); + assertEquals(StringHelper.ID_LENGTH, sciId2.length); // Make sure CheckIndex includes id output: ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); @@ -4085,4 +4088,81 @@ protected boolean isEnableTestPoints() { assertEquals(maxCompletedSequenceNumber+2, writer.getMaxCompletedSequenceNumber()); } } + + public void testSegmentCommitInfoId() throws IOException { + try (Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, + new IndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) { + Document doc = new Document(); + doc.add(new NumericDocValuesField("num", 1)); + doc.add(new StringField("id", "1", Field.Store.NO)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new NumericDocValuesField("num", 1)); + doc.add(new StringField("id", "2", Field.Store.NO)); + writer.addDocument(doc); + writer.commit(); + SegmentInfos segmentCommitInfos = SegmentInfos.readLatestCommit(dir); + byte[] id = segmentCommitInfos.info(0).getId(); + byte[] segInfoId = segmentCommitInfos.info(0).info.getId(); + + writer.updateNumericDocValue(new Term("id", "1"), "num", 2); + writer.commit(); + segmentCommitInfos = SegmentInfos.readLatestCommit(dir); + assertEquals(1, segmentCommitInfos.size()); + assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(segmentCommitInfos.info(0).getId())); + assertEquals(StringHelper.idToString(segInfoId), StringHelper.idToString(segmentCommitInfos.info(0).info.getId())); + id = segmentCommitInfos.info(0).getId(); + writer.addDocument(new Document()); // second segment + writer.commit(); + segmentCommitInfos = SegmentInfos.readLatestCommit(dir); + assertEquals(2, segmentCommitInfos.size()); + assertEquals(StringHelper.idToString(id), StringHelper.idToString(segmentCommitInfos.info(0).getId())); + assertEquals(StringHelper.idToString(segInfoId), StringHelper.idToString(segmentCommitInfos.info(0).info.getId())); + + doc = new Document(); + doc.add(new NumericDocValuesField("num", 5)); + doc.add(new StringField("id", "1", Field.Store.NO)); + writer.updateDocument(new Term("id", "1"), doc); + writer.commit(); + segmentCommitInfos = SegmentInfos.readLatestCommit(dir); + assertEquals(3, segmentCommitInfos.size()); + assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(segmentCommitInfos.info(0).getId())); + assertEquals(StringHelper.idToString(segInfoId), StringHelper.idToString(segmentCommitInfos.info(0).info.getId())); + writer.close(); + try (Directory dir2 = newDirectory(); + IndexWriter writer2 = new IndexWriter(dir2, + new IndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) { + writer2.addIndexes(dir); + writer2.commit(); + SegmentInfos infos2 = SegmentInfos.readLatestCommit(dir2); + assertEquals(infos2.size(), segmentCommitInfos.size()); + for (int i = 0; i < infos2.size(); i++) { + assertEquals(StringHelper.idToString(infos2.info(i).getId()), StringHelper.idToString(segmentCommitInfos.info(i).getId())); + assertEquals(StringHelper.idToString(infos2.info(i).info.getId()), StringHelper.idToString(segmentCommitInfos.info(i).info.getId())); + } + } + } + + Set ids = new HashSet<>(); + for (int i = 0; i < 2; i++) { + try (Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, + new IndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) { + Document doc = new Document(); + doc.add(new NumericDocValuesField("num", 1)); + doc.add(new StringField("id", "1", Field.Store.NO)); + writer.addDocument(doc); + writer.commit(); + SegmentInfos segmentCommitInfos = SegmentInfos.readLatestCommit(dir); + String id = StringHelper.idToString(segmentCommitInfos.info(0).getId()); + assertTrue(ids.add(id)); + writer.updateNumericDocValue(new Term("id", "1"), "num", 2); + writer.commit(); + segmentCommitInfos = SegmentInfos.readLatestCommit(dir); + id = StringHelper.idToString(segmentCommitInfos.info(0).getId()); + assertTrue(ids.add(id)); + } + } + } } From f0a72f82bb17bd2582799aa25514ef764e012570 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 16 Apr 2020 22:48:03 +0200 Subject: [PATCH 04/15] move to null as an indicator that the segment has no ID --- .../index/TestBackwardsCompatibility.java | 11 ++++++++ .../org/apache/lucene/index/IndexWriter.java | 6 +++++ .../lucene/index/SegmentCommitInfo.java | 26 ++++++++++++++----- .../org/apache/lucene/index/SegmentInfos.java | 12 ++++----- 4 files changed, 42 insertions(+), 13 deletions(-) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index 8ef819cedb88..3e068c2d32b6 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -1364,6 +1364,16 @@ public void testIndexCreatedVersion() throws IOException { } } + public void testSegmentCommitInfoId() throws IOException { + for (String name : oldNames) { + Directory dir = oldIndexDirs.get(name); + SegmentInfos infos = SegmentInfos.readLatestCommit(dir); + for (SegmentCommitInfo info : infos) { + assertNull(info.getId()); + } + } + } + public void verifyUsesDefaultCodec(Directory dir, String name) throws Exception { DirectoryReader r = DirectoryReader.open(dir); for (LeafReaderContext context : r.leaves()) { @@ -1389,6 +1399,7 @@ private int checkAllSegmentsUpgraded(Directory dir, int indexCreatedVersion) thr } for (SegmentCommitInfo si : infos) { assertEquals(Version.LATEST, si.info.getVersion()); + assertNotNull(si.getId()); } assertEquals(Version.LATEST, infos.getCommitLuceneVersion()); assertEquals(indexCreatedVersion, infos.getIndexCreatedVersionMajor()); diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 64fe46825f27..a32662153229 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -3249,6 +3249,12 @@ private long prepareCommitInternal() throws IOException { segmentInfos.setUserData(userData, false); } + // we have to generate a new id for every SegmentCommitInfo + // before we clone otherwise the ID will be lost for subsequent + // commits and SCIs that never had one won't get an ID + for (SegmentCommitInfo info : segmentInfos) { + info.ensureValidId(); + } // Must clone the segmentInfos while we still // hold fullFlushLock and while sync'd so that // no partial changes (eg a delete w/o diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java index 0bb243281565..7ea01783a189 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java @@ -85,7 +85,7 @@ public class SegmentCommitInfo { /** * Sole constructor. - * @param info + * @param info * {@link SegmentInfo} that we wrap * @param delCount * number of deleted documents in this segment @@ -108,7 +108,7 @@ public SegmentCommitInfo(SegmentInfo info, int delCount, int softDelCount, long this.docValuesGen = docValuesGen; this.nextWriteDocValuesGen = docValuesGen == -1 ? 1 : docValuesGen + 1; this.id = id; - if (id.length != StringHelper.ID_LENGTH) { + if (id != null && id.length != StringHelper.ID_LENGTH) { throw new IllegalArgumentException("invalid id: " + Arrays.toString(id)); } } @@ -365,7 +365,9 @@ public String toString(int pendingDelCount) { if (softDelCount > 0) { s += " :softDel=" + softDelCount; } - s += " :id=" + StringHelper.idToString(getId()); + if (id != null) { + s += " :id=" + StringHelper.idToString(getId()); + } return s; } @@ -405,11 +407,21 @@ private void generationAdvanced() { id = null; } + /** + * Returns and Id that uniquely identifies this segment commit or null if there is no ID assigned. + * This ID changes each time the the segment changes due to a delete, doc-value or field update. + */ public byte[] getId() { - if (id == null) { - // we advanced a generation - need to generate a new ID - id = StringHelper.randomId(); + return id == null ? null : id.clone(); + } + + /** + * Generates a valid id if there is no id set for this segment commit. + */ + byte[] ensureValidId() { + if (this.id == null) { + this.id = StringHelper.randomId(); } - return id.clone(); + return getId(); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 2e94c8aed435..2dd34eca42f3 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -381,8 +381,7 @@ public static final SegmentInfos readCommit(Directory directory, ChecksumIndexIn sciId = new byte[StringHelper.ID_LENGTH]; input.readBytes(sciId, 0, sciId.length); } else { - sciId = infos.id; - // NOCOMMIT can we do this? it would at least give us consistent BWC but we can't identify the same SCI in different commits + sciId = null; } SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, dvGen, sciId); siPerCommit.setFieldInfosFiles(input.readSetOfStrings()); @@ -547,10 +546,10 @@ public void write(IndexOutput out) throws IOException { throw new IllegalStateException("cannot write segment: invalid maxDoc segment=" + si.name + " maxDoc=" + si.maxDoc() + " softDelCount=" + softDelCount); } out.writeInt(softDelCount); - byte[] sciId = siPerCommit.getId(); - if (sciId.length != StringHelper.ID_LENGTH) { - throw new IllegalArgumentException("invalid SegmentCommitInfo#id: " + Arrays.toString(sciId)); - } + // we ensure that there is a valid ID for this SCI just in case + // this is manually upgraded outside of IW + byte[] sciId = siPerCommit.ensureValidId(); + assert sciId != null && sciId.length == StringHelper.ID_LENGTH : "invalid SegmentCommitInfo#id: " + Arrays.toString(sciId); out.writeBytes(sciId, 0, sciId.length); out.writeSetOfStrings(siPerCommit.getFieldInfosFiles()); final Map> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles(); @@ -783,6 +782,7 @@ final void prepareCommit(Directory dir) throws IOException { throw new IllegalStateException("prepareCommit was already called"); } dir.syncMetaData(); + assert segments.stream().filter(i -> i.getId() == null).count() == 0 : "some SegmentCommitInfos have no id"; write(dir); } From ac9f0ea45659a25234df6ca4fc6a4bda506c1b57 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 17 Apr 2020 12:47:54 +0200 Subject: [PATCH 05/15] fix imports --- .../core/src/test/org/apache/lucene/index/TestSegmentInfos.java | 1 - 1 file changed, 1 deletion(-) diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java index abf1df1ecab6..19d821481e00 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java @@ -27,7 +27,6 @@ import org.apache.lucene.util.Version; import java.io.IOException; -import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Map; From bbe3648bbf4086b804705b71c7750f46e9414dc2 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 17 Apr 2020 13:02:23 +0200 Subject: [PATCH 06/15] add move tests on the bwc end --- .../apache/lucene/index/TestBackwardsCompatibility.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index c7766fa8c4c4..101fbfcd219d 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -768,7 +768,6 @@ public void testUnsupportedOldIndexes() throws Exception { writer.close(); } } - writer = null; } ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); @@ -830,8 +829,9 @@ public void testAddOldIndexes() throws IOException { IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig(new MockAnalyzer(random()))); w.addIndexes(oldDir); w.close(); - targetDir.close(); + SegmentInfos si = SegmentInfos.readLatestCommit(targetDir); + assertEquals(0, si.asList().stream().filter(sci -> sci.getId() == null).count()); if (VERBOSE) { System.out.println("\nTEST: done adding indices; now close"); } @@ -862,7 +862,8 @@ public void testAddOldIndexesReader() throws IOException { TestUtil.addIndexesSlowly(w, reader); w.close(); reader.close(); - + SegmentInfos si = SegmentInfos.readLatestCommit(targetDir); + assertEquals(0, si.asList().stream().filter(sci -> sci.getId() == null).count()); targetDir.close(); } } From 3224de64404f139c8f0a84345bec7a00803676ce Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 17 Apr 2020 14:08:01 +0200 Subject: [PATCH 07/15] preserve null ID until the SCI actually changes --- .../index/TestBackwardsCompatibility.java | 4 ++-- .../org/apache/lucene/index/IndexWriter.java | 6 ----- .../lucene/index/SegmentCommitInfo.java | 16 ++++++++----- .../org/apache/lucene/index/SegmentInfos.java | 23 +++++++++++++------ .../apache/lucene/index/IndexSplitter.java | 1 - 5 files changed, 28 insertions(+), 22 deletions(-) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index 101fbfcd219d..61421b5c3ebb 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -831,7 +831,7 @@ public void testAddOldIndexes() throws IOException { w.close(); SegmentInfos si = SegmentInfos.readLatestCommit(targetDir); - assertEquals(0, si.asList().stream().filter(sci -> sci.getId() == null).count()); + assertEquals("none of the segments should have been upgraded", 0, si.asList().stream().filter(sci -> sci.getId() != null).count()); if (VERBOSE) { System.out.println("\nTEST: done adding indices; now close"); } @@ -863,7 +863,7 @@ public void testAddOldIndexesReader() throws IOException { w.close(); reader.close(); SegmentInfos si = SegmentInfos.readLatestCommit(targetDir); - assertEquals(0, si.asList().stream().filter(sci -> sci.getId() == null).count()); + assertEquals("all SCIs should have an id now", 0, si.asList().stream().filter(sci -> sci.getId() == null).count()); targetDir.close(); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index a32662153229..64fe46825f27 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -3249,12 +3249,6 @@ private long prepareCommitInternal() throws IOException { segmentInfos.setUserData(userData, false); } - // we have to generate a new id for every SegmentCommitInfo - // before we clone otherwise the ID will be lost for subsequent - // commits and SCIs that never had one won't get an ID - for (SegmentCommitInfo info : segmentInfos) { - info.ensureValidId(); - } // Must clone the segmentInfos while we still // hold fullFlushLock and while sync'd so that // no partial changes (eg a delete w/o diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java index 7ea01783a189..5e0435ebeb54 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java @@ -83,6 +83,9 @@ public class SegmentCommitInfo { // this is never written to/read from the Directory private long bufferedDeletesGen = -1; + // is set once any of the generations has been advanced. + private boolean hasAdvanced; + /** * Sole constructor. * @param info @@ -365,7 +368,7 @@ public String toString(int pendingDelCount) { if (softDelCount > 0) { s += " :softDel=" + softDelCount; } - if (id != null) { + if (getId() != null) { s += " :id=" + StringHelper.idToString(getId()); } @@ -404,7 +407,7 @@ final int getDelCount(boolean includeSoftDeletes) { private void generationAdvanced() { sizeInBytes = -1; - id = null; + hasAdvanced = true; } /** @@ -412,16 +415,17 @@ private void generationAdvanced() { * This ID changes each time the the segment changes due to a delete, doc-value or field update. */ public byte[] getId() { + maybeAdvanceID(); return id == null ? null : id.clone(); } /** - * Generates a valid id if there is no id set for this segment commit. + * Generates a new Id if this segment commit has changed. */ - byte[] ensureValidId() { - if (this.id == null) { + private void maybeAdvanceID() { + if (hasAdvanced) { + hasAdvanced = false; this.id = StringHelper.randomId(); } - return getId(); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 2dd34eca42f3..932a9efe1e6a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -377,9 +377,13 @@ public static final SegmentInfos readCommit(Directory directory, ChecksumIndexIn throw new CorruptIndexException("invalid deletion count: " + softDelCount + delCount + " vs maxDoc=" + info.maxDoc(), input); } final byte[] sciId; - if (format > VERSION_74) { - sciId = new byte[StringHelper.ID_LENGTH]; - input.readBytes(sciId, 0, sciId.length); + if (format > VERSION_74 ) { + if (input.readByte() == 1) { + sciId = new byte[StringHelper.ID_LENGTH]; + input.readBytes(sciId, 0, sciId.length); + } else { + sciId = null; + } } else { sciId = null; } @@ -548,9 +552,15 @@ public void write(IndexOutput out) throws IOException { out.writeInt(softDelCount); // we ensure that there is a valid ID for this SCI just in case // this is manually upgraded outside of IW - byte[] sciId = siPerCommit.ensureValidId(); - assert sciId != null && sciId.length == StringHelper.ID_LENGTH : "invalid SegmentCommitInfo#id: " + Arrays.toString(sciId); - out.writeBytes(sciId, 0, sciId.length); + byte[] sciId = siPerCommit.getId(); + if (sciId != null) { + out.writeByte((byte)1); + assert sciId.length == StringHelper.ID_LENGTH : "invalid SegmentCommitInfo#id: " + Arrays.toString(sciId); + out.writeBytes(sciId, 0, sciId.length); + } else { + out.writeByte((byte)0); + } + out.writeSetOfStrings(siPerCommit.getFieldInfosFiles()); final Map> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles(); out.writeInt(dvUpdatesFiles.size()); @@ -782,7 +792,6 @@ final void prepareCommit(Directory dir) throws IOException { throw new IllegalStateException("prepareCommit was already called"); } dir.syncMetaData(); - assert segments.stream().filter(i -> i.getId() == null).count() == 0 : "some SegmentCommitInfos have no id"; write(dir); } diff --git a/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java b/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java index 9f9afb4085c1..efc6ba3b9249 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java +++ b/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java @@ -29,7 +29,6 @@ import java.util.Locale; import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.SuppressForbidden; /** From 9f160c8b211e8cfad156d3fe122083fa7985bf62 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 17 Apr 2020 14:24:29 +0200 Subject: [PATCH 08/15] fix luke --- .../java/org/apache/lucene/luke/models/util/IndexUtils.java | 6 ++++-- .../lucene/luke/models/overview/OverviewImplTest.java | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/util/IndexUtils.java b/lucene/luke/src/java/org/apache/lucene/luke/models/util/IndexUtils.java index e59689a4c29f..71e8070af465 100644 --- a/lucene/luke/src/java/org/apache/lucene/luke/models/util/IndexUtils.java +++ b/lucene/luke/src/java/org/apache/lucene/luke/models/util/IndexUtils.java @@ -303,8 +303,10 @@ protected String doBody(String segmentFileName) throws IOException { format = "Lucene 7.2 or later"; } else if (actualVersion == SegmentInfos.VERSION_74) { format = "Lucene 7.4 or later"; - } else if (actualVersion > SegmentInfos.VERSION_74) { - format = "Lucene 7.4 or later (UNSUPPORTED)"; + } else if (actualVersion == SegmentInfos.VERSION_86) { + format = "Lucene 8.6 or later"; + } else if (actualVersion > SegmentInfos.VERSION_86) { + format = "Lucene 8.6 or later (UNSUPPORTED)"; } } else { format = "Lucene 6.x or prior (UNSUPPORTED)"; diff --git a/lucene/luke/src/test/org/apache/lucene/luke/models/overview/OverviewImplTest.java b/lucene/luke/src/test/org/apache/lucene/luke/models/overview/OverviewImplTest.java index 6e4522b81568..5eb15ef946c7 100644 --- a/lucene/luke/src/test/org/apache/lucene/luke/models/overview/OverviewImplTest.java +++ b/lucene/luke/src/test/org/apache/lucene/luke/models/overview/OverviewImplTest.java @@ -87,7 +87,7 @@ public void testGetIndexVersion() { @Test public void testGetIndexFormat() { OverviewImpl overview = new OverviewImpl(reader, indexDir.toString()); - assertEquals("Lucene 7.4 or later", overview.getIndexFormat().get()); + assertEquals("Lucene 8.6 or later", overview.getIndexFormat().get()); } @Test From a392c740269e0a85fd553a57b819bb0f6513a583 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 17 Apr 2020 19:40:14 +0200 Subject: [PATCH 09/15] apply feedback --- .../index/TestBackwardsCompatibility.java | 12 +++++++++--- .../apache/lucene/index/SegmentCommitInfo.java | 17 +++-------------- .../org/apache/lucene/index/SegmentInfos.java | 18 ++++++++++++------ 3 files changed, 24 insertions(+), 23 deletions(-) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index 61421b5c3ebb..f8ffc4e32ba5 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -831,7 +831,8 @@ public void testAddOldIndexes() throws IOException { w.close(); SegmentInfos si = SegmentInfos.readLatestCommit(targetDir); - assertEquals("none of the segments should have been upgraded", 0, si.asList().stream().filter(sci -> sci.getId() != null).count()); + assertNull("none of the segments should have been upgraded", + si.asList().stream().filter(sci -> sci.getId() != null).findAny().orElse(null)); if (VERBOSE) { System.out.println("\nTEST: done adding indices; now close"); } @@ -863,7 +864,8 @@ public void testAddOldIndexesReader() throws IOException { w.close(); reader.close(); SegmentInfos si = SegmentInfos.readLatestCommit(targetDir); - assertEquals("all SCIs should have an id now", 0, si.asList().stream().filter(sci -> sci.getId() == null).count()); + assertNull("all SCIs should have an id now", + si.asList().stream().filter(sci -> sci.getId() != null).findAny().orElse(null)); targetDir.close(); } } @@ -1373,7 +1375,11 @@ public void testSegmentCommitInfoId() throws IOException { Directory dir = oldIndexDirs.get(name); SegmentInfos infos = SegmentInfos.readLatestCommit(dir); for (SegmentCommitInfo info : infos) { - assertNull(info.getId()); + if (info.info.getVersion().onOrAfter(Version.LUCENE_9_0_0)) { + assertNotNull(info.toString(), info.getId()); + } else { + assertNull(info.toString(), info.getId()); + } } } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java index 5e0435ebeb54..42b4e81f23d0 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java @@ -368,8 +368,8 @@ public String toString(int pendingDelCount) { if (softDelCount > 0) { s += " :softDel=" + softDelCount; } - if (getId() != null) { - s += " :id=" + StringHelper.idToString(getId()); + if (this.id != null) { + s += " :id=" + StringHelper.idToString(id); } return s; @@ -407,7 +407,7 @@ final int getDelCount(boolean includeSoftDeletes) { private void generationAdvanced() { sizeInBytes = -1; - hasAdvanced = true; + this.id = StringHelper.randomId(); } /** @@ -415,17 +415,6 @@ private void generationAdvanced() { * This ID changes each time the the segment changes due to a delete, doc-value or field update. */ public byte[] getId() { - maybeAdvanceID(); return id == null ? null : id.clone(); } - - /** - * Generates a new Id if this segment commit has changed. - */ - private void maybeAdvanceID() { - if (hasAdvanced) { - hasAdvanced = false; - this.id = StringHelper.randomId(); - } - } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 932a9efe1e6a..f9edccd46f58 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -377,12 +377,18 @@ public static final SegmentInfos readCommit(Directory directory, ChecksumIndexIn throw new CorruptIndexException("invalid deletion count: " + softDelCount + delCount + " vs maxDoc=" + info.maxDoc(), input); } final byte[] sciId; - if (format > VERSION_74 ) { - if (input.readByte() == 1) { - sciId = new byte[StringHelper.ID_LENGTH]; - input.readBytes(sciId, 0, sciId.length); - } else { - sciId = null; + if (format > VERSION_74) { + byte marker = input.readByte(); + switch (marker) { + case 1: + sciId = new byte[StringHelper.ID_LENGTH]; + input.readBytes(sciId, 0, sciId.length); + break; + case 0: + sciId = null; + break; + default: + throw new CorruptIndexException("invalid SegmentCommitInfo ID marker: " + marker, input); } } else { sciId = null; From 5af4896cfdc491ea645c24f0392f6739303e654f Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 17 Apr 2020 19:55:50 +0200 Subject: [PATCH 10/15] fix assertion --- .../org/apache/lucene/index/TestBackwardsCompatibility.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index f8ffc4e32ba5..ebcfdbfd757f 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -865,7 +865,7 @@ public void testAddOldIndexesReader() throws IOException { reader.close(); SegmentInfos si = SegmentInfos.readLatestCommit(targetDir); assertNull("all SCIs should have an id now", - si.asList().stream().filter(sci -> sci.getId() != null).findAny().orElse(null)); + si.asList().stream().filter(sci -> sci.getId() == null).findAny().orElse(null)); targetDir.close(); } } From f211f9717881e876a404b9bb7236e6f469b2245d Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 17 Apr 2020 20:13:19 +0200 Subject: [PATCH 11/15] remove leftover --- .../src/java/org/apache/lucene/index/SegmentCommitInfo.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java index 42b4e81f23d0..0750287ff074 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java @@ -83,9 +83,6 @@ public class SegmentCommitInfo { // this is never written to/read from the Directory private long bufferedDeletesGen = -1; - // is set once any of the generations has been advanced. - private boolean hasAdvanced; - /** * Sole constructor. * @param info From 788df6796cf0b6c6aec9ae7e16ce71f613b525a7 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 17 Apr 2020 23:15:23 +0200 Subject: [PATCH 12/15] add missing 8.6 version constant --- .../apache/lucene/index/TestBackwardsCompatibility.java | 2 +- lucene/core/src/java/org/apache/lucene/util/Version.java | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index ebcfdbfd757f..df68acd2b233 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -1375,7 +1375,7 @@ public void testSegmentCommitInfoId() throws IOException { Directory dir = oldIndexDirs.get(name); SegmentInfos infos = SegmentInfos.readLatestCommit(dir); for (SegmentCommitInfo info : infos) { - if (info.info.getVersion().onOrAfter(Version.LUCENE_9_0_0)) { + if (info.info.getVersion().onOrAfter(Version.LUCENE_8_6_0)) { assertNotNull(info.toString(), info.getId()); } else { assertNull(info.toString(), info.getId()); diff --git a/lucene/core/src/java/org/apache/lucene/util/Version.java b/lucene/core/src/java/org/apache/lucene/util/Version.java index aa1f3487da53..5ed1a959da30 100644 --- a/lucene/core/src/java/org/apache/lucene/util/Version.java +++ b/lucene/core/src/java/org/apache/lucene/util/Version.java @@ -102,6 +102,13 @@ public final class Version { @Deprecated public static final Version LUCENE_8_5_1 = new Version(8, 5, 1); + /** + * Match settings and bugs in Lucene's 8.6.0 release. + * @deprecated Use latest + */ + @Deprecated + public static final Version LUCENE_8_6_0 = new Version(8, 6, 0); + /** * Match settings and bugs in Lucene's 9.0.0 release. *

From 383703c2bbf6c50fcd31627b13d8d8472fc68561 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sat, 18 Apr 2020 13:14:52 +0200 Subject: [PATCH 13/15] remove this --- .../src/java/org/apache/lucene/index/SegmentCommitInfo.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java index 0750287ff074..954894bb79fe 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java @@ -404,7 +404,7 @@ final int getDelCount(boolean includeSoftDeletes) { private void generationAdvanced() { sizeInBytes = -1; - this.id = StringHelper.randomId(); + id = StringHelper.randomId(); } /** From b352f912c5236c87b615c1374bbae50bc78de07a Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sat, 18 Apr 2020 13:32:33 +0200 Subject: [PATCH 14/15] fix test --- .../org/apache/lucene/index/TestBackwardsCompatibility.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index df68acd2b233..245cef1b2a09 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -832,7 +832,9 @@ public void testAddOldIndexes() throws IOException { SegmentInfos si = SegmentInfos.readLatestCommit(targetDir); assertNull("none of the segments should have been upgraded", - si.asList().stream().filter(sci -> sci.getId() != null).findAny().orElse(null)); + si.asList().stream().filter( // depending on the MergePolicy we might see these segments merged away + sci -> sci.getId() != null && sci.info.getVersion().onOrAfter(Version.LUCENE_8_6_0) == false + ).findAny().orElse(null)); if (VERBOSE) { System.out.println("\nTEST: done adding indices; now close"); } From 9a41b2b33274fbc2551b33be16d0fe07d1e68e96 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sat, 18 Apr 2020 14:24:19 +0200 Subject: [PATCH 15/15] add changes entry --- lucene/CHANGES.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 6cf4eb4896a2..8171b3b79172 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -143,6 +143,9 @@ Improvements * LUCENE-9304: Removed ThreadState abstraction from DocumentsWriter which allows pooling of DWPT directly and improves the approachability of the IndexWriter code. (Simon Willnauer) +* LUCENE-9324: Add an ID to SegmentCommitInfo in order to compare commits for equality and make + snapshots incremental on generational files. (Simon Willnauer, Mike Mccandless, Adrien Grant) + Optimizations ---------------------