elastic · ywelsch · Dec 8, 2021 · Nov 30, 2021 · Dec 3, 2021 · Dec 3, 2021
diff --git a/server/src/main/java/org/elasticsearch/index/IndexModule.java b/server/src/main/java/org/elasticsearch/index/IndexModule.java
@@ -210,6 +210,13 @@ public Settings getSettings() {
         return indexSettings.getSettings();
     }
 
+    /**
+     * Returns the {@link IndexSettings} for this index
+     */
+    public IndexSettings indexSettings() {
+        return indexSettings;
+    }
+
     /**
      * Returns the index this module is associated with
      */

diff --git a/server/src/main/java/org/elasticsearch/index/fieldvisitor/FieldsVisitor.java b/server/src/main/java/org/elasticsearch/index/fieldvisitor/FieldsVisitor.java
@@ -35,7 +35,9 @@
  * Base {@link StoredFieldVisitor} that retrieves all non-redundant metadata.
  */
 public class FieldsVisitor extends FieldNamesProvidingStoredFieldsVisitor {
-    private static final Set<String> BASE_REQUIRED_FIELDS = unmodifiableSet(newHashSet(IdFieldMapper.NAME, RoutingFieldMapper.NAME));
+    private static final Set<String> BASE_REQUIRED_FIELDS = unmodifiableSet(
+        newHashSet("_uid", IdFieldMapper.NAME, RoutingFieldMapper.NAME)
+    );
 
     private final boolean loadSource;
     private final String sourceFieldName;
@@ -103,9 +105,18 @@ public void binaryField(FieldInfo fieldInfo, BytesRef value) {
 
     @Override
     public void stringField(FieldInfo fieldInfo, String value) {
-        assert IdFieldMapper.NAME.equals(fieldInfo.name) == false : "_id field must go through binaryField";
         assert sourceFieldName.equals(fieldInfo.name) == false : "source field must go through binaryField";
-        addValue(fieldInfo.name, value);
+        if ("_uid".equals(fieldInfo.name)) {
+            // 5.x-only
+            int delimiterIndex = value.indexOf('#'); // type is not allowed to have # in it..., ids can
+            // type = value.substring(0, delimiterIndex);
+            id = value.substring(delimiterIndex + 1);
+        } else if (IdFieldMapper.NAME.equals(fieldInfo.name)) {
+            // only applies to 5.x indices that have single_type = true
+            id = value;
+        } else {
+            addValue(fieldInfo.name, value);
+        }
     }
 
     @Override

diff --git a/...rc/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java b/...rc/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java
@@ -16,6 +16,7 @@
 import org.elasticsearch.common.io.stream.Writeable;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.common.xcontent.XContentParserUtils;
+import org.elasticsearch.core.Nullable;
 import org.elasticsearch.index.store.StoreFileMetadata;
 import org.elasticsearch.xcontent.ParseField;
 import org.elasticsearch.xcontent.ToXContentFragment;
@@ -41,6 +42,7 @@ public static class FileInfo implements Writeable {
         public static final String SERIALIZE_WRITER_UUID = "serialize_writer_uuid";
 
         private final String name;
+        @Nullable
         private final ByteSizeValue partSize;
         private final long partBytes;
         private final int numberOfParts;
@@ -53,7 +55,7 @@ public static class FileInfo implements Writeable {
          * @param metadata  the files meta data
          * @param partSize     size of the single chunk
          */
-        public FileInfo(String name, StoreFileMetadata metadata, ByteSizeValue partSize) {
+        public FileInfo(String name, StoreFileMetadata metadata, @Nullable ByteSizeValue partSize) {
             this.name = Objects.requireNonNull(name);
             this.metadata = metadata;
 

diff --git a/...d-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/OldLuceneVersions.java b/...d-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/OldLuceneVersions.java
@@ -7,18 +7,10 @@
 
 package org.elasticsearch.xpack.lucene.bwc;
 
-import org.apache.lucene.backward_codecs.lucene70.Lucene70Codec;
-import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.index.SegmentCommitInfo;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.SegmentInfos;
-import org.apache.lucene.store.ChecksumIndexInput;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.Version;
-import org.elasticsearch.Build;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.UUIDs;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.index.IndexModule;
@@ -28,6 +20,7 @@
 import org.elasticsearch.index.shard.IndexShard;
 import org.elasticsearch.plugins.IndexStorePlugin;
 import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.xpack.lucene.bwc.codecs.BWCCodec;
 
 import java.io.IOException;
 import java.io.UncheckedIOException;
@@ -38,37 +31,38 @@ public class OldLuceneVersions extends Plugin implements IndexStorePlugin {
 
     @Override
     public void onIndexModule(IndexModule indexModule) {
-        if (Build.CURRENT.isSnapshot()) {
+        if (indexModule.indexSettings().getIndexVersionCreated().before(Version.CURRENT.minimumIndexCompatibilityVersion())) {
             indexModule.addIndexEventListener(new IndexEventListener() {
                 @Override
                 public void afterFilesRestoredFromRepository(IndexShard indexShard) {
-                    maybeConvertToNewFormat(indexShard);
+                    convertToNewFormat(indexShard);
                 }
             });
         }
     }
 
-    private static void maybeConvertToNewFormat(IndexShard indexShard) {
+    /**
+     * The trick used to allow newer Lucene versions to read older Lucene indices is to convert the old directory to a directory that new
+     * Lucene versions happily operate on. The way newer Lucene versions happily comply with reading older data is to put in place a
+     * segments file that the newer Lucene version can open, using codecs that allow reading everything from the old files, making it
+     * available under the newer interfaces. The way this works is to read in the old segments file using a special class
+     * {@link OldSegmentInfos} that supports reading older Lucene {@link SegmentInfos}, and then write out an updated segments file that
+     * newer Lucene versions can understand.s
+     */
+    private static void convertToNewFormat(IndexShard indexShard) {
         indexShard.store().incRef();
         try {
             try {
-                Version version = getLuceneVersion(indexShard.store().directory());
-                // Lucene version in [7.0.0, 8.0.0)
-                if (version != null
-                    && version.onOrAfter(Version.fromBits(7, 0, 0))
-                    && version.onOrAfter(Version.fromBits(8, 0, 0)) == false) {
-                    final OldSegmentInfos oldSegmentInfos = OldSegmentInfos.readLatestCommit(indexShard.store().directory(), 7);
-                    final SegmentInfos segmentInfos = convertLucene7x(oldSegmentInfos);
-                    // write upgraded segments file
-                    segmentInfos.commit(indexShard.store().directory());
+                final OldSegmentInfos oldSegmentInfos = OldSegmentInfos.readLatestCommit(indexShard.store().directory(), 0);
+                final SegmentInfos segmentInfos = convertToNewerLuceneVersion(oldSegmentInfos);
+                // write upgraded segments file
+                segmentInfos.commit(indexShard.store().directory());
 
-                    // validate that what we have written can be read using standard path
-                    // TODO: norelease: remove this when development completes
-                    SegmentInfos segmentInfos1 = SegmentInfos.readLatestCommit(indexShard.store().directory());
+                // what we have written can be read using standard path
+                assert SegmentInfos.readLatestCommit(indexShard.store().directory()) != null;
 
-                    // clean older segments file
-                    Lucene.pruneUnreferencedFiles(segmentInfos1.getSegmentsFileName(), indexShard.store().directory());
-                }
+                // clean older segments file
+                Lucene.pruneUnreferencedFiles(segmentInfos.getSegmentsFileName(), indexShard.store().directory());
             } catch (IOException e) {
                 throw new UncheckedIOException(e);
             }
@@ -77,44 +71,31 @@ private static void maybeConvertToNewFormat(IndexShard indexShard) {
         }
     }
 
-    private static Version getLuceneVersion(Directory directory) throws IOException {
-        final String segmentFileName = SegmentInfos.getLastCommitSegmentsFileName(directory);
-        if (segmentFileName != null) {
-            long generation = SegmentInfos.generationFromSegmentsFileName(segmentFileName);
-            try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) {
-                CodecUtil.checkHeader(input, "segments", 0, Integer.MAX_VALUE);
-                byte[] id = new byte[StringHelper.ID_LENGTH];
-                input.readBytes(id, 0, id.length);
-                CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX));
-
-                Version luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
-                int indexCreatedVersion = input.readVInt();
-                return luceneVersion;
-            } catch (Exception e) {
-                // ignore
-            }
-        }
-        return null;
-    }
-
-    private static SegmentInfos convertLucene7x(OldSegmentInfos oldSegmentInfos) {
+    private static SegmentInfos convertToNewerLuceneVersion(OldSegmentInfos oldSegmentInfos) {
         final SegmentInfos segmentInfos = new SegmentInfos(org.apache.lucene.util.Version.LATEST.major);
         segmentInfos.setNextWriteGeneration(oldSegmentInfos.getGeneration() + 1);
         final Map<String, String> map = new HashMap<>(oldSegmentInfos.getUserData());
-        map.put(Engine.HISTORY_UUID_KEY, UUIDs.randomBase64UUID());
-        map.put(SequenceNumbers.LOCAL_CHECKPOINT_KEY, Long.toString(SequenceNumbers.NO_OPS_PERFORMED));
-        map.put(SequenceNumbers.MAX_SEQ_NO, Long.toString(SequenceNumbers.NO_OPS_PERFORMED));
-        map.put(Engine.MAX_UNSAFE_AUTO_ID_TIMESTAMP_COMMIT_ID, "-1");
+        if (map.containsKey(Engine.HISTORY_UUID_KEY) == false) {
+            map.put(Engine.HISTORY_UUID_KEY, UUIDs.randomBase64UUID());
+        }
+        if (map.containsKey(SequenceNumbers.LOCAL_CHECKPOINT_KEY) == false) {
+            map.put(SequenceNumbers.LOCAL_CHECKPOINT_KEY, Long.toString(SequenceNumbers.NO_OPS_PERFORMED));
+        }
+        if (map.containsKey(SequenceNumbers.MAX_SEQ_NO) == false) {
+            map.put(SequenceNumbers.MAX_SEQ_NO, Long.toString(SequenceNumbers.NO_OPS_PERFORMED));
+        }
+        if (map.containsKey(Engine.MAX_UNSAFE_AUTO_ID_TIMESTAMP_COMMIT_ID) == false) {
+            map.put(Engine.MAX_UNSAFE_AUTO_ID_TIMESTAMP_COMMIT_ID, "-1");
+        }
         segmentInfos.setUserData(map, true);
         for (SegmentCommitInfo infoPerCommit : oldSegmentInfos.asList()) {
-            SegmentInfo info = infoPerCommit.info;
-            SegmentInfo newInfo = wrap(info);
+            final SegmentInfo newInfo = BWCCodec.wrap(infoPerCommit.info);
 
             segmentInfos.add(
                 new SegmentCommitInfo(
                     newInfo,
                     infoPerCommit.getDelCount(),
-                    0,
+                    infoPerCommit.getSoftDelCount(),
                     infoPerCommit.getDelGen(),
                     infoPerCommit.getFieldInfosGen(),
                     infoPerCommit.getDocValuesGen(),
@@ -125,31 +106,6 @@ private static SegmentInfos convertLucene7x(OldSegmentInfos oldSegmentInfos) {
         return segmentInfos;
     }
 
-    static SegmentInfo wrap(SegmentInfo segmentInfo) {
-        // Use Version.LATEST instead of original version, otherwise SegmentCommitInfo will bark when processing (N-1 limitation)
-        // TODO: alternatively store the original version information in attributes?
-        byte[] id = segmentInfo.getId();
-        if (id == null) {
-            id = StringHelper.randomId();
-        }
-        Codec codec = segmentInfo.getCodec() instanceof Lucene70Codec ? new BWCLucene70Codec() : segmentInfo.getCodec();
-        SegmentInfo segmentInfo1 = new SegmentInfo(
-            segmentInfo.dir,
-            org.apache.lucene.util.Version.LATEST,
-            org.apache.lucene.util.Version.LATEST,
-            segmentInfo.name,
-            segmentInfo.maxDoc(),
-            segmentInfo.getUseCompoundFile(),
-            codec,
-            segmentInfo.getDiagnostics(),
-            id,
-            segmentInfo.getAttributes(),
-            null
-        );
-        segmentInfo1.setFiles(segmentInfo.files());
-        return segmentInfo1;
-    }
-
     @Override
     public Map<String, DirectoryFactory> getDirectoryFactories() {
         return Map.of();

diff --git a/...old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/OldSegmentInfos.java b/...old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/OldSegmentInfos.java
@@ -60,6 +60,12 @@
 @SuppressWarnings("CheckStyle")
 @SuppressForbidden(reason = "Lucene class")
 public class OldSegmentInfos implements Cloneable, Iterable<SegmentCommitInfo> {
+
+    /**
+     * Adds the {@link Version} that committed this segments_N file, as well as the {@link Version}
+     * of the oldest segment, since 5.3+
+     */
+    public static final int VERSION_53 = 6;
     /**
      * The version that added information about the Lucene version at the time when the index has been
      * created.
@@ -209,13 +215,16 @@ static final OldSegmentInfos readCommit(Directory directory, ChecksumIndexInput
             if (magic != CodecUtil.CODEC_MAGIC) {
                 throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
             }
-            format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_70, VERSION_CURRENT);
+            format = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_53, VERSION_CURRENT);
             byte[] id = new byte[StringHelper.ID_LENGTH];
             input.readBytes(id, 0, id.length);
             CodecUtil.checkIndexHeaderSuffix(input, Long.toString(generation, Character.MAX_RADIX));
 
             Version luceneVersion = Version.fromBits(input.readVInt(), input.readVInt(), input.readVInt());
-            int indexCreatedVersion = input.readVInt();
+            int indexCreatedVersion = 6;
+            if (format >= VERSION_70) {
+                indexCreatedVersion = input.readVInt();
+            }
             if (luceneVersion.major < indexCreatedVersion) {
                 throw new CorruptIndexException(
                     "Creation version ["
@@ -252,7 +261,7 @@ static final OldSegmentInfos readCommit(Directory directory, ChecksumIndexInput
         } catch (Throwable t) {
             priorE = t;
         } finally {
-            if (format >= VERSION_70) { // oldest supported version
+            if (format >= VERSION_53) { // oldest supported version
                 CodecUtil.checkFooter(input, priorE);
             } else {
                 throw IOUtils.rethrowAlways(priorE);
@@ -283,6 +292,14 @@ private static void parseSegmentInfos(Directory directory, DataInput input, OldS
         long totalDocs = 0;
         for (int seg = 0; seg < numSegments; seg++) {
             String segName = input.readString();
+            if (format < VERSION_70) {
+                byte hasID = input.readByte();
+                if (hasID == 0) {
+                    throw new IndexFormatTooOldException(input, "Segment is from Lucene 4.x");
+                } else if (hasID != 1) {
+                    throw new CorruptIndexException("invalid hasID byte, got: " + hasID, input);
+                }
+            }
             byte[] segmentID = new byte[StringHelper.ID_LENGTH];
             input.readBytes(segmentID, 0, segmentID.length);
             Codec codec = readCodec(input);