diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 6cf4eb4896a2..8171b3b79172 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -143,6 +143,9 @@ Improvements * LUCENE-9304: Removed ThreadState abstraction from DocumentsWriter which allows pooling of DWPT directly and improves the approachability of the IndexWriter code. (Simon Willnauer) +* LUCENE-9324: Add an ID to SegmentCommitInfo in order to compare commits for equality and make + snapshots incremental on generational files. (Simon Willnauer, Mike Mccandless, Adrien Grant) + Optimizations --------------------- diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index 26d7ee082574..245cef1b2a09 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -768,7 +768,6 @@ public void testUnsupportedOldIndexes() throws Exception { writer.close(); } } - writer = null; } ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); @@ -830,8 +829,12 @@ public void testAddOldIndexes() throws IOException { IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig(new MockAnalyzer(random()))); w.addIndexes(oldDir); w.close(); - targetDir.close(); + SegmentInfos si = SegmentInfos.readLatestCommit(targetDir); + assertNull("none of the segments should have been upgraded", + si.asList().stream().filter( // depending on the MergePolicy we might see these segments merged away + sci -> sci.getId() != null && sci.info.getVersion().onOrAfter(Version.LUCENE_8_6_0) == false + ).findAny().orElse(null)); if (VERBOSE) { System.out.println("\nTEST: done adding indices; now close"); } @@ -862,7 +865,9 @@ public void testAddOldIndexesReader() throws IOException { TestUtil.addIndexesSlowly(w, reader); w.close(); reader.close(); - + SegmentInfos si = SegmentInfos.readLatestCommit(targetDir); + assertNull("all SCIs should have an id now", + si.asList().stream().filter(sci -> sci.getId() == null).findAny().orElse(null)); targetDir.close(); } } @@ -1367,6 +1372,20 @@ public void testIndexCreatedVersion() throws IOException { } } + public void testSegmentCommitInfoId() throws IOException { + for (String name : oldNames) { + Directory dir = oldIndexDirs.get(name); + SegmentInfos infos = SegmentInfos.readLatestCommit(dir); + for (SegmentCommitInfo info : infos) { + if (info.info.getVersion().onOrAfter(Version.LUCENE_8_6_0)) { + assertNotNull(info.toString(), info.getId()); + } else { + assertNull(info.toString(), info.getId()); + } + } + } + } + public void verifyUsesDefaultCodec(Directory dir, String name) throws Exception { DirectoryReader r = DirectoryReader.open(dir); for (LeafReaderContext context : r.leaves()) { @@ -1392,6 +1411,7 @@ private int checkAllSegmentsUpgraded(Directory dir, int indexCreatedVersion) thr } for (SegmentCommitInfo si : infos) { assertEquals(Version.LATEST, si.info.getVersion()); + assertNotNull(si.getId()); } assertEquals(Version.LATEST, infos.getCommitLuceneVersion()); assertEquals(indexCreatedVersion, infos.getIndexCreatedVersionMajor()); diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index fdcd7ee42c85..1286c765fbb4 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -419,7 +419,7 @@ FlushedSegment flush(DocumentsWriter.FlushNotifications flushNotifications) thro pendingUpdates.clearDeleteTerms(); segmentInfo.setFiles(new HashSet<>(directory.getCreatedFiles())); - final SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(segmentInfo, 0, flushState.softDelCountOnFlush, -1L, -1L, -1L); + final SegmentCommitInfo segmentInfoPerCommit = new SegmentCommitInfo(segmentInfo, 0, flushState.softDelCountOnFlush, -1L, -1L, -1L, StringHelper.randomId()); if (infoStream.isEnabled("DWPT")) { infoStream.message("DWPT", "new segment has " + (flushState.liveDocs == null ? 0 : flushState.delCountOnFlush) + " deleted docs"); infoStream.message("DWPT", "new segment has " + flushState.softDelCountOnFlush + " soft-deleted docs"); diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 99ee5cba615f..64fe46825f27 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -3003,7 +3003,7 @@ public long addIndexes(CodecReader... readers) throws IOException { notifyAll(); } } - SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, numSoftDeleted, -1L, -1L, -1L); + SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, numSoftDeleted, -1L, -1L, -1L, StringHelper.randomId()); info.setFiles(new HashSet<>(trackingDir.getCreatedFiles())); trackingDir.clearCreatedFiles(); @@ -3081,7 +3081,7 @@ private SegmentCommitInfo copySegmentAsIs(SegmentCommitInfo info, String segName info.info.getUseCompoundFile(), info.info.getCodec(), info.info.getDiagnostics(), info.info.getId(), info.info.getAttributes(), info.info.getIndexSort()); SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo, info.getDelCount(), info.getSoftDelCount(), info.getDelGen(), - info.getFieldInfosGen(), info.getDocValuesGen()); + info.getFieldInfosGen(), info.getDocValuesGen(), info.getId()); newInfo.setFiles(info.info.files()); newInfoPerCommit.setFieldInfosFiles(info.getFieldInfosFiles()); @@ -4273,7 +4273,7 @@ synchronized private void _mergeInit(MergePolicy.OneMerge merge) throws IOExcept details.put("mergeMaxNumSegments", "" + merge.maxNumSegments); details.put("mergeFactor", Integer.toString(merge.segments.size())); setDiagnostics(si, SOURCE_MERGE, details); - merge.setMergeInfo(new SegmentCommitInfo(si, 0, 0, -1L, -1L, -1L)); + merge.setMergeInfo(new SegmentCommitInfo(si, 0, 0, -1L, -1L, -1L, StringHelper.randomId())); if (infoStream.isEnabled("IW")) { infoStream.message("IW", "merge seg=" + merge.info.info.name + " " + segString(merge.segments)); diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java index 69117574698b..954894bb79fe 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCommitInfo.java @@ -18,6 +18,7 @@ import java.io.IOException; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -26,6 +27,8 @@ import java.util.Map.Entry; import java.util.Set; +import org.apache.lucene.util.StringHelper; + /** Embeds a [read-only] SegmentInfo and adds per-commit * fields. * @@ -35,6 +38,9 @@ public class SegmentCommitInfo { /** The {@link SegmentInfo} that we wrap. */ public final SegmentInfo info; + /** Id that uniquely identifies this segment commit. */ + private byte[] id; + // How many deleted docs in the segment: private int delCount; @@ -79,7 +85,6 @@ public class SegmentCommitInfo { /** * Sole constructor. - * * @param info * {@link SegmentInfo} that we wrap * @param delCount @@ -90,8 +95,9 @@ public class SegmentCommitInfo { * FieldInfos generation number (used to name field-infos files) * @param docValuesGen * DocValues generation number (used to name doc-values updates files) + * @param id Id that uniquely identifies this segment commit. This id must be 16 bytes long. See {@link StringHelper#randomId()} */ - public SegmentCommitInfo(SegmentInfo info, int delCount, int softDelCount, long delGen, long fieldInfosGen, long docValuesGen) { + public SegmentCommitInfo(SegmentInfo info, int delCount, int softDelCount, long delGen, long fieldInfosGen, long docValuesGen, byte[] id) { this.info = info; this.delCount = delCount; this.softDelCount = softDelCount; @@ -101,6 +107,10 @@ public SegmentCommitInfo(SegmentInfo info, int delCount, int softDelCount, long this.nextWriteFieldInfosGen = fieldInfosGen == -1 ? 1 : fieldInfosGen + 1; this.docValuesGen = docValuesGen; this.nextWriteDocValuesGen = docValuesGen == -1 ? 1 : docValuesGen + 1; + this.id = id; + if (id != null && id.length != StringHelper.ID_LENGTH) { + throw new IllegalArgumentException("invalid id: " + Arrays.toString(id)); + } } /** Returns the per-field DocValues updates files. */ @@ -138,7 +148,7 @@ public void setFieldInfosFiles(Set fieldInfosFiles) { void advanceDelGen() { delGen = nextWriteDelGen; nextWriteDelGen = delGen+1; - sizeInBytes = -1; + generationAdvanced(); } /** Called if there was an exception while writing @@ -162,7 +172,7 @@ void setNextWriteDelGen(long v) { void advanceFieldInfosGen() { fieldInfosGen = nextWriteFieldInfosGen; nextWriteFieldInfosGen = fieldInfosGen + 1; - sizeInBytes = -1; + generationAdvanced(); } /** @@ -187,7 +197,7 @@ void setNextWriteFieldInfosGen(long v) { void advanceDocValuesGen() { docValuesGen = nextWriteDocValuesGen; nextWriteDocValuesGen = docValuesGen + 1; - sizeInBytes = -1; + generationAdvanced(); } /** @@ -251,7 +261,7 @@ long getBufferedDeletesGen() { void setBufferedDeletesGen(long v) { if (bufferedDeletesGen == -1) { bufferedDeletesGen = v; - sizeInBytes = -1; + generationAdvanced(); } else { throw new IllegalStateException("buffered deletes gen should only be set once"); } @@ -355,6 +365,9 @@ public String toString(int pendingDelCount) { if (softDelCount > 0) { s += " :softDel=" + softDelCount; } + if (this.id != null) { + s += " :id=" + StringHelper.idToString(id); + } return s; } @@ -366,7 +379,7 @@ public String toString() { @Override public SegmentCommitInfo clone() { - SegmentCommitInfo other = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, docValuesGen); + SegmentCommitInfo other = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, docValuesGen, getId()); // Not clear that we need to carry over nextWriteDelGen // (i.e. do we ever clone after a failed write and // before the next successful write?), but just do it to @@ -388,4 +401,17 @@ public SegmentCommitInfo clone() { final int getDelCount(boolean includeSoftDeletes) { return includeSoftDeletes ? getDelCount() + getSoftDelCount() : getDelCount(); } + + private void generationAdvanced() { + sizeInBytes = -1; + id = StringHelper.randomId(); + } + + /** + * Returns and Id that uniquely identifies this segment commit or null if there is no ID assigned. + * This ID changes each time the the segment changes due to a delete, doc-value or field update. + */ + public byte[] getId() { + return id == null ? null : id.clone(); + } } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 116c2e11435e..f9edccd46f58 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -124,7 +124,9 @@ public final class SegmentInfos implements Cloneable, Iterable info.maxDoc()) { throw new CorruptIndexException("invalid deletion count: " + softDelCount + delCount + " vs maxDoc=" + info.maxDoc(), input); } - SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, dvGen); + final byte[] sciId; + if (format > VERSION_74) { + byte marker = input.readByte(); + switch (marker) { + case 1: + sciId = new byte[StringHelper.ID_LENGTH]; + input.readBytes(sciId, 0, sciId.length); + break; + case 0: + sciId = null; + break; + default: + throw new CorruptIndexException("invalid SegmentCommitInfo ID marker: " + marker, input); + } + } else { + sciId = null; + } + SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, softDelCount, delGen, fieldInfosGen, dvGen, sciId); siPerCommit.setFieldInfosFiles(input.readSetOfStrings()); final Map> dvUpdateFiles; final int numDVFields = input.readInt(); @@ -460,7 +479,7 @@ private void write(Directory directory) throws IOException { try { segnOutput = directory.createOutput(segmentFileName, IOContext.DEFAULT); - write(directory, segnOutput); + write(segnOutput); segnOutput.close(); directory.sync(Collections.singleton(segmentFileName)); success = true; @@ -479,7 +498,7 @@ private void write(Directory directory) throws IOException { } /** Write ourselves to the provided {@link IndexOutput} */ - public void write(Directory directory, IndexOutput out) throws IOException { + public void write(IndexOutput out) throws IOException { CodecUtil.writeIndexHeader(out, "segments", VERSION_CURRENT, StringHelper.randomId(), Long.toString(generation, Character.MAX_RADIX)); out.writeVInt(Version.LATEST.major); @@ -537,6 +556,17 @@ public void write(Directory directory, IndexOutput out) throws IOException { throw new IllegalStateException("cannot write segment: invalid maxDoc segment=" + si.name + " maxDoc=" + si.maxDoc() + " softDelCount=" + softDelCount); } out.writeInt(softDelCount); + // we ensure that there is a valid ID for this SCI just in case + // this is manually upgraded outside of IW + byte[] sciId = siPerCommit.getId(); + if (sciId != null) { + out.writeByte((byte)1); + assert sciId.length == StringHelper.ID_LENGTH : "invalid SegmentCommitInfo#id: " + Arrays.toString(sciId); + out.writeBytes(sciId, 0, sciId.length); + } else { + out.writeByte((byte)0); + } + out.writeSetOfStrings(siPerCommit.getFieldInfosFiles()); final Map> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles(); out.writeInt(dvUpdatesFiles.size()); diff --git a/lucene/core/src/java/org/apache/lucene/util/Version.java b/lucene/core/src/java/org/apache/lucene/util/Version.java index aa1f3487da53..5ed1a959da30 100644 --- a/lucene/core/src/java/org/apache/lucene/util/Version.java +++ b/lucene/core/src/java/org/apache/lucene/util/Version.java @@ -102,6 +102,13 @@ public final class Version { @Deprecated public static final Version LUCENE_8_5_1 = new Version(8, 5, 1); + /** + * Match settings and bugs in Lucene's 8.6.0 release. + * @deprecated Use latest + */ + @Deprecated + public static final Version LUCENE_8_6_0 = new Version(8, 6, 0); + /** * Match settings and bugs in Lucene's 9.0.0 release. *

diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java index d7eea7a1ad3b..f7760704e1fa 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java @@ -238,7 +238,7 @@ private SegmentCommitInfo merge(Directory dir, SegmentCommitInfo si1, SegmentCom } } - return new SegmentCommitInfo(si, 0, 0, -1L, -1L, -1L); + return new SegmentCommitInfo(si, 0, 0, -1L, -1L, -1L, StringHelper.randomId()); } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index 42a85d00285f..cb02f794aa2b 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -2484,8 +2484,11 @@ public void testIds() throws Exception { assertEquals(StringHelper.ID_LENGTH, id1.length); byte[] id2 = sis.info(0).info.getId(); + byte[] sciId2 = sis.info(0).getId(); assertNotNull(id2); + assertNotNull(sciId2); assertEquals(StringHelper.ID_LENGTH, id2.length); + assertEquals(StringHelper.ID_LENGTH, sciId2.length); // Make sure CheckIndex includes id output: ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); @@ -4085,4 +4088,81 @@ protected boolean isEnableTestPoints() { assertEquals(maxCompletedSequenceNumber+2, writer.getMaxCompletedSequenceNumber()); } } + + public void testSegmentCommitInfoId() throws IOException { + try (Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, + new IndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) { + Document doc = new Document(); + doc.add(new NumericDocValuesField("num", 1)); + doc.add(new StringField("id", "1", Field.Store.NO)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new NumericDocValuesField("num", 1)); + doc.add(new StringField("id", "2", Field.Store.NO)); + writer.addDocument(doc); + writer.commit(); + SegmentInfos segmentCommitInfos = SegmentInfos.readLatestCommit(dir); + byte[] id = segmentCommitInfos.info(0).getId(); + byte[] segInfoId = segmentCommitInfos.info(0).info.getId(); + + writer.updateNumericDocValue(new Term("id", "1"), "num", 2); + writer.commit(); + segmentCommitInfos = SegmentInfos.readLatestCommit(dir); + assertEquals(1, segmentCommitInfos.size()); + assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(segmentCommitInfos.info(0).getId())); + assertEquals(StringHelper.idToString(segInfoId), StringHelper.idToString(segmentCommitInfos.info(0).info.getId())); + id = segmentCommitInfos.info(0).getId(); + writer.addDocument(new Document()); // second segment + writer.commit(); + segmentCommitInfos = SegmentInfos.readLatestCommit(dir); + assertEquals(2, segmentCommitInfos.size()); + assertEquals(StringHelper.idToString(id), StringHelper.idToString(segmentCommitInfos.info(0).getId())); + assertEquals(StringHelper.idToString(segInfoId), StringHelper.idToString(segmentCommitInfos.info(0).info.getId())); + + doc = new Document(); + doc.add(new NumericDocValuesField("num", 5)); + doc.add(new StringField("id", "1", Field.Store.NO)); + writer.updateDocument(new Term("id", "1"), doc); + writer.commit(); + segmentCommitInfos = SegmentInfos.readLatestCommit(dir); + assertEquals(3, segmentCommitInfos.size()); + assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(segmentCommitInfos.info(0).getId())); + assertEquals(StringHelper.idToString(segInfoId), StringHelper.idToString(segmentCommitInfos.info(0).info.getId())); + writer.close(); + try (Directory dir2 = newDirectory(); + IndexWriter writer2 = new IndexWriter(dir2, + new IndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) { + writer2.addIndexes(dir); + writer2.commit(); + SegmentInfos infos2 = SegmentInfos.readLatestCommit(dir2); + assertEquals(infos2.size(), segmentCommitInfos.size()); + for (int i = 0; i < infos2.size(); i++) { + assertEquals(StringHelper.idToString(infos2.info(i).getId()), StringHelper.idToString(segmentCommitInfos.info(i).getId())); + assertEquals(StringHelper.idToString(infos2.info(i).info.getId()), StringHelper.idToString(segmentCommitInfos.info(i).info.getId())); + } + } + } + + Set ids = new HashSet<>(); + for (int i = 0; i < 2; i++) { + try (Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, + new IndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) { + Document doc = new Document(); + doc.add(new NumericDocValuesField("num", 1)); + doc.add(new StringField("id", "1", Field.Store.NO)); + writer.addDocument(doc); + writer.commit(); + SegmentInfos segmentCommitInfos = SegmentInfos.readLatestCommit(dir); + String id = StringHelper.idToString(segmentCommitInfos.info(0).getId()); + assertTrue(ids.add(id)); + writer.updateNumericDocValue(new Term("id", "1"), "num", 2); + writer.commit(); + segmentCommitInfos = SegmentInfos.readLatestCommit(dir); + id = StringHelper.idToString(segmentCommitInfos.info(0).getId()); + assertTrue(ids.add(id)); + } + } + } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterThreadsToSegments.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterThreadsToSegments.java index 347cb7d22354..b14a887ae519 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterThreadsToSegments.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterThreadsToSegments.java @@ -332,7 +332,7 @@ public void run() { byte id[] = readSegmentInfoID(dir, fileName); SegmentInfo si = TestUtil.getDefaultCodec().segmentInfoFormat().read(dir, segName, id, IOContext.DEFAULT); si.setCodec(codec); - SegmentCommitInfo sci = new SegmentCommitInfo(si, 0, 0, -1, -1, -1); + SegmentCommitInfo sci = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId()); SegmentReader sr = new SegmentReader(sci, Version.LATEST.major, IOContext.DEFAULT); try { thread0Count += sr.docFreq(new Term("field", "threadID0")); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestOneMergeWrappingMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestOneMergeWrappingMergePolicy.java index e240f549ecdf..e0206ccc72d1 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestOneMergeWrappingMergePolicy.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestOneMergeWrappingMergePolicy.java @@ -137,7 +137,7 @@ private static MergePolicy.MergeSpecification createRandomMergeSpecification(Dir Collections.emptyMap(), // attributes null /* indexSort */); final List segments = new LinkedList(); - segments.add(new SegmentCommitInfo(si, 0, 0, 0, 0, 0)); + segments.add(new SegmentCommitInfo(si, 0, 0, 0, 0, 0, StringHelper.randomId())); ms.add(new MergePolicy.OneMerge(segments)); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java b/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java index 143f671990b2..841ebe1f969a 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPendingDeletes.java @@ -41,7 +41,7 @@ public void testDeleteDoc() throws IOException { Directory dir = new ByteBuffersDirectory(); SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 10, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); - SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId()); PendingDeletes deletes = newPendingDeletes(commitInfo); assertNull(deletes.getLiveDocs()); int docToDelete = TestUtil.nextInt(random(), 0, 7); @@ -75,7 +75,7 @@ public void testWriteLiveDocs() throws IOException { Directory dir = new ByteBuffersDirectory(); SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 6, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); - SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId()); PendingDeletes deletes = newPendingDeletes(commitInfo); assertFalse(deletes.writeLiveDocs(dir)); assertEquals(0, dir.listAll().length); @@ -132,7 +132,7 @@ public void testIsFullyDeleted() throws IOException { Directory dir = new ByteBuffersDirectory(); SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 3, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); - SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId()); FieldInfos fieldInfos = FieldInfos.EMPTY; si.getCodec().fieldInfosFormat().write(dir, si, "", fieldInfos, IOContext.DEFAULT); PendingDeletes deletes = newPendingDeletes(commitInfo); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java b/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java index 666b3c4052e1..a7c681189042 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPendingSoftDeletes.java @@ -150,7 +150,7 @@ public void testApplyUpdates() throws IOException { Directory dir = new ByteBuffersDirectory(); SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 10, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); - SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId()); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig()); for (int i = 0; i < si.maxDoc(); i++) { writer.addDocument(new Document()); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java index f5029b84d83c..19d821481e00 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java @@ -64,7 +64,7 @@ public void testVersionsOneSegment() throws IOException { Collections.emptyMap(), id, Collections.emptyMap(), null); info.setFiles(Collections.emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); - SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId()); sis.add(commitInfo); sis.commit(dir); @@ -86,20 +86,24 @@ public void testVersionsTwoSegments() throws IOException { Collections.emptyMap(), id, Collections.emptyMap(), null); info.setFiles(Collections.emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); - SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId()); sis.add(commitInfo); info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_1", 1, false, Codec.getDefault(), Collections.emptyMap(), id, Collections.emptyMap(), null); info.setFiles(Collections.emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); - commitInfo = new SegmentCommitInfo(info, 0, 0,-1, -1, -1); + commitInfo = new SegmentCommitInfo(info, 0, 0,-1, -1, -1, StringHelper.randomId()); sis.add(commitInfo); sis.commit(dir); + byte[] commitInfoId0 = sis.info(0).getId(); + byte[] commitInfoId1 = sis.info(1).getId(); sis = SegmentInfos.readLatestCommit(dir); assertEquals(Version.LUCENE_9_0_0, sis.getMinSegmentLuceneVersion()); assertEquals(Version.LATEST, sis.getCommitLuceneVersion()); + assertEquals(StringHelper.idToString(commitInfoId0), StringHelper.idToString(sis.info(0).getId())); + assertEquals(StringHelper.idToString(commitInfoId1), StringHelper.idToString(sis.info(1).getId())); dir.close(); } @@ -145,5 +149,34 @@ public void testToString() throws Throwable{ dir.close(); } + + public void testIDChangesOnAdvance() throws IOException { + try (BaseDirectoryWrapper dir = newDirectory()) { + dir.setCheckIndexOnClose(false); + byte id[] = StringHelper.randomId(); + SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(), + Collections.emptyMap(), StringHelper.randomId(), Collections.emptyMap(), null); + SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, id); + assertEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); + commitInfo.advanceDelGen(); + assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); + + id = commitInfo.getId(); + commitInfo.advanceDocValuesGen(); + assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); + + id = commitInfo.getId(); + commitInfo.advanceFieldInfosGen(); + assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); + SegmentCommitInfo clone = commitInfo.clone(); + id = commitInfo.getId(); + assertEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); + assertEquals(StringHelper.idToString(id), StringHelper.idToString(clone.getId())); + + commitInfo.advanceFieldInfosGen(); + assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); + assertEquals("clone changed but shouldn't", StringHelper.idToString(id), StringHelper.idToString(clone.getId())); + } + } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java index 34f62c62bc3e..04f357e307e8 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java @@ -96,7 +96,7 @@ public void testMerge() throws IOException { //Should be able to open a new SegmentReader against the new directory SegmentReader mergedReader = new SegmentReader(new SegmentCommitInfo( mergeState.segmentInfo, - 0, 0, -1L, -1L, -1L), + 0, 0, -1L, -1L, -1L, StringHelper.randomId()), Version.LATEST.major, newIOContext(random())); assertTrue(mergedReader != null); diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/util/IndexUtils.java b/lucene/luke/src/java/org/apache/lucene/luke/models/util/IndexUtils.java index e59689a4c29f..71e8070af465 100644 --- a/lucene/luke/src/java/org/apache/lucene/luke/models/util/IndexUtils.java +++ b/lucene/luke/src/java/org/apache/lucene/luke/models/util/IndexUtils.java @@ -303,8 +303,10 @@ protected String doBody(String segmentFileName) throws IOException { format = "Lucene 7.2 or later"; } else if (actualVersion == SegmentInfos.VERSION_74) { format = "Lucene 7.4 or later"; - } else if (actualVersion > SegmentInfos.VERSION_74) { - format = "Lucene 7.4 or later (UNSUPPORTED)"; + } else if (actualVersion == SegmentInfos.VERSION_86) { + format = "Lucene 8.6 or later"; + } else if (actualVersion > SegmentInfos.VERSION_86) { + format = "Lucene 8.6 or later (UNSUPPORTED)"; } } else { format = "Lucene 6.x or prior (UNSUPPORTED)"; diff --git a/lucene/luke/src/test/org/apache/lucene/luke/models/overview/OverviewImplTest.java b/lucene/luke/src/test/org/apache/lucene/luke/models/overview/OverviewImplTest.java index 6e4522b81568..5eb15ef946c7 100644 --- a/lucene/luke/src/test/org/apache/lucene/luke/models/overview/OverviewImplTest.java +++ b/lucene/luke/src/test/org/apache/lucene/luke/models/overview/OverviewImplTest.java @@ -87,7 +87,7 @@ public void testGetIndexVersion() { @Test public void testGetIndexFormat() { OverviewImpl overview = new OverviewImpl(reader, indexDir.toString()); - assertEquals("Lucene 7.4 or later", overview.getIndexFormat().get()); + assertEquals("Lucene 8.6 or later", overview.getIndexFormat().get()); } @Test diff --git a/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java b/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java index afe45cc734a8..efc6ba3b9249 100644 --- a/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java +++ b/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java @@ -143,7 +143,7 @@ public void split(Path destDir, String[] segs) throws IOException { info.getUseCompoundFile(), info.getCodec(), info.getDiagnostics(), info.getId(), Collections.emptyMap(), null); destInfos.add(new SegmentCommitInfo(newInfo, infoPerCommit.getDelCount(), infoPerCommit.getSoftDelCount(), infoPerCommit.getDelGen(), infoPerCommit.getFieldInfosGen(), - infoPerCommit.getDocValuesGen())); + infoPerCommit.getDocValuesGen(), infoPerCommit.getId())); // now copy files over Collection files = infoPerCommit.files(); for (final String srcName : files) { diff --git a/lucene/replicator/src/java/org/apache/lucene/replicator/nrt/PrimaryNode.java b/lucene/replicator/src/java/org/apache/lucene/replicator/nrt/PrimaryNode.java index f96f6d233950..2d24f9bc4d45 100644 --- a/lucene/replicator/src/java/org/apache/lucene/replicator/nrt/PrimaryNode.java +++ b/lucene/replicator/src/java/org/apache/lucene/replicator/nrt/PrimaryNode.java @@ -245,7 +245,7 @@ private synchronized boolean setCurrentInfos(Set completedMergeFiles) th // Serialize the SegmentInfos. ByteBuffersDataOutput buffer = new ByteBuffersDataOutput(); try (ByteBuffersIndexOutput tmpIndexOutput = new ByteBuffersIndexOutput(buffer, "temporary", "temporary")) { - infos.write(dir, tmpIndexOutput); + infos.write(tmpIndexOutput); } byte[] infosBytes = buffer.toArrayCopy(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseLiveDocsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseLiveDocsFormatTestCase.java index 9c01990b1953..4f15bef21bd6 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseLiveDocsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseLiveDocsFormatTestCase.java @@ -125,10 +125,10 @@ public int length() { final Directory dir = newDirectory(); final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "foo", maxDoc, random().nextBoolean(), codec, Collections.emptyMap(), StringHelper.randomId(), Collections.emptyMap(), null); - SegmentCommitInfo sci = new SegmentCommitInfo(si, 0, 0, 0, -1, -1); + SegmentCommitInfo sci = new SegmentCommitInfo(si, 0, 0, 0, -1, -1, StringHelper.randomId()); format.writeLiveDocs(bits, dir, sci, maxDoc - numLiveDocs, IOContext.DEFAULT); - sci = new SegmentCommitInfo(si, maxDoc - numLiveDocs, 0, 1, -1, -1); + sci = new SegmentCommitInfo(si, maxDoc - numLiveDocs, 0, 1, -1, -1, StringHelper.randomId()); final Bits bits2 = format.readLiveDocs(dir, sci, IOContext.READONCE); assertEquals(maxDoc, bits2.length()); for (int i = 0; i < maxDoc; ++i) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java index 9928c8405718..94a85dffcff1 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseMergePolicyTestCase.java @@ -140,7 +140,7 @@ public void testFindForcedDeletesMerges() throws IOException { Collections.emptyMap(), // attributes null /* indexSort */); info.setFiles(Collections.emptyList()); - infos.add(new SegmentCommitInfo(info, random().nextInt(1), 0, -1, -1, -1)); + infos.add(new SegmentCommitInfo(info, random().nextInt(1), 0, -1, -1, -1, StringHelper.randomId())); } MergePolicy.MergeSpecification forcedDeletesMerges = mp.findForcedDeletesMerges(infos, context); if (forcedDeletesMerges != null) { @@ -208,7 +208,7 @@ protected static SegmentCommitInfo makeSegmentCommitInfo(String name, int maxDoc name, maxDoc, false, TestUtil.getDefaultCodec(), Collections.emptyMap(), id, Collections.singletonMap(IndexWriter.SOURCE, source), null); info.setFiles(Collections.singleton(name + "_size=" + Long.toString((long) (sizeMB * 1024 * 1024)) + ".fake")); - return new SegmentCommitInfo(info, numDeletedDocs, 0, 0, 0, 0); + return new SegmentCommitInfo(info, numDeletedDocs, 0, 0, 0, 0, StringHelper.randomId()); } /** A directory that computes the length of a file based on its name. */ @@ -331,7 +331,7 @@ protected static SegmentInfos applyDeletes(SegmentInfos infos, int numDeletes) { int newDelCount = sci.getDelCount() + segDeletes; assert newDelCount <= sci.info.maxDoc(); if (newDelCount < sci.info.maxDoc()) { // drop fully deleted segments - SegmentCommitInfo newInfo = new SegmentCommitInfo(sci.info, sci.getDelCount() + segDeletes, 0, sci.getDelGen() + 1, sci.getFieldInfosGen(), sci.getDocValuesGen()); + SegmentCommitInfo newInfo = new SegmentCommitInfo(sci.info, sci.getDelCount() + segDeletes, 0, sci.getDelGen() + 1, sci.getFieldInfosGen(), sci.getDocValuesGen(), StringHelper.randomId()); newInfoList.add(newInfo); } numDeletes -= segDeletes;