Skip to content

Commit 133f56c

Browse files
committed
Do not expose hard-deleted docs in Lucene history (#32333)
Today when reading operation history in Lucene, we read all documents. However, if indexing a document is aborted, IndexWriter will hard-delete it; we, therefore, need to exclude that document from Lucene history. This commit makes sure that we exclude aborted documents by using the hard liveDocs of a SegmentReader if there are deletes.
1 parent 5f9492d commit 133f56c

File tree

3 files changed

+118
-33
lines changed

3 files changed

+118
-33
lines changed

server/src/main/java/org/elasticsearch/common/lucene/Lucene.java

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -837,7 +837,8 @@ public int length() {
837837
}
838838

839839
/**
840-
* Wraps a directory reader to include all live docs.
840+
* Wraps a directory reader to make all documents live except those were rolled back
841+
* or hard-deleted due to non-aborting exceptions during indexing.
841842
* The wrapped reader can be used to query all documents.
842843
*
843844
* @param in the input directory reader
@@ -848,17 +849,21 @@ public static DirectoryReader wrapAllDocsLive(DirectoryReader in) throws IOExcep
848849
}
849850

850851
private static final class DirectoryReaderWithAllLiveDocs extends FilterDirectoryReader {
851-
static final class SubReaderWithAllLiveDocs extends FilterLeafReader {
852-
SubReaderWithAllLiveDocs(LeafReader in) {
852+
static final class LeafReaderWithLiveDocs extends FilterLeafReader {
853+
final Bits liveDocs;
854+
final int numDocs;
855+
LeafReaderWithLiveDocs(LeafReader in, Bits liveDocs, int numDocs) {
853856
super(in);
857+
this.liveDocs = liveDocs;
858+
this.numDocs = numDocs;
854859
}
855860
@Override
856861
public Bits getLiveDocs() {
857-
return null;
862+
return liveDocs;
858863
}
859864
@Override
860865
public int numDocs() {
861-
return maxDoc();
866+
return numDocs;
862867
}
863868
@Override
864869
public CacheHelper getCoreCacheHelper() {
@@ -869,14 +874,28 @@ public CacheHelper getReaderCacheHelper() {
869874
return null; // Modifying liveDocs
870875
}
871876
}
877+
872878
DirectoryReaderWithAllLiveDocs(DirectoryReader in) throws IOException {
873-
super(in, new FilterDirectoryReader.SubReaderWrapper() {
879+
super(in, new SubReaderWrapper() {
874880
@Override
875881
public LeafReader wrap(LeafReader leaf) {
876-
return new SubReaderWithAllLiveDocs(leaf);
882+
SegmentReader segmentReader = segmentReader(leaf);
883+
Bits hardLiveDocs = segmentReader.getHardLiveDocs();
884+
if (hardLiveDocs == null) {
885+
return new LeafReaderWithLiveDocs(leaf, null, leaf.maxDoc());
886+
}
887+
// TODO: Avoid recalculate numDocs everytime.
888+
int numDocs = 0;
889+
for (int i = 0; i < hardLiveDocs.length(); i++) {
890+
if (hardLiveDocs.get(i)) {
891+
numDocs++;
892+
}
893+
}
894+
return new LeafReaderWithLiveDocs(segmentReader, hardLiveDocs, numDocs);
877895
}
878896
});
879897
}
898+
880899
@Override
881900
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
882901
return wrapAllDocsLive(in);

server/src/test/java/org/elasticsearch/common/lucene/LuceneTests.java

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,23 @@
3333
import org.apache.lucene.index.NoMergePolicy;
3434
import org.apache.lucene.index.RandomIndexWriter;
3535
import org.apache.lucene.index.SegmentInfos;
36+
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
3637
import org.apache.lucene.index.Term;
3738
import org.apache.lucene.search.IndexSearcher;
3839
import org.apache.lucene.search.MatchAllDocsQuery;
40+
import org.apache.lucene.search.ScoreDoc;
3941
import org.apache.lucene.search.TermQuery;
42+
import org.apache.lucene.search.TopDocs;
4043
import org.apache.lucene.search.Weight;
4144
import org.apache.lucene.store.Directory;
4245
import org.apache.lucene.store.MMapDirectory;
4346
import org.apache.lucene.store.MockDirectoryWrapper;
4447
import org.apache.lucene.util.Bits;
48+
import org.elasticsearch.core.internal.io.IOUtils;
4549
import org.elasticsearch.test.ESTestCase;
4650

4751
import java.io.IOException;
52+
import java.io.StringReader;
4853
import java.util.ArrayList;
4954
import java.util.Collections;
5055
import java.util.HashSet;
@@ -53,6 +58,8 @@
5358
import java.util.concurrent.CountDownLatch;
5459
import java.util.concurrent.atomic.AtomicBoolean;
5560

61+
import static org.hamcrest.Matchers.equalTo;
62+
5663
public class LuceneTests extends ESTestCase {
5764
public void testWaitForIndex() throws Exception {
5865
final MockDirectoryWrapper dir = newMockDirectory();
@@ -406,4 +413,88 @@ public void testMMapHackSupported() throws Exception {
406413
// add assume's here if needed for certain platforms, but we should know if it does not work.
407414
assertTrue("MMapDirectory does not support unmapping: " + MMapDirectory.UNMAP_NOT_SUPPORTED_REASON, MMapDirectory.UNMAP_SUPPORTED);
408415
}
416+
417+
public void testWrapAllDocsLive() throws Exception {
418+
Directory dir = newDirectory();
419+
IndexWriterConfig config = newIndexWriterConfig().setSoftDeletesField(Lucene.SOFT_DELETE_FIELD)
420+
.setMergePolicy(new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETE_FIELD, MatchAllDocsQuery::new, newMergePolicy()));
421+
IndexWriter writer = new IndexWriter(dir, config);
422+
int numDocs = between(1, 10);
423+
Set<String> liveDocs = new HashSet<>();
424+
for (int i = 0; i < numDocs; i++) {
425+
String id = Integer.toString(i);
426+
Document doc = new Document();
427+
doc.add(new StringField("id", id, Store.YES));
428+
writer.addDocument(doc);
429+
liveDocs.add(id);
430+
}
431+
for (int i = 0; i < numDocs; i++) {
432+
if (randomBoolean()) {
433+
String id = Integer.toString(i);
434+
Document doc = new Document();
435+
doc.add(new StringField("id", "v2-" + id, Store.YES));
436+
if (randomBoolean()) {
437+
doc.add(Lucene.newSoftDeleteField());
438+
}
439+
writer.softUpdateDocument(new Term("id", id), doc, Lucene.newSoftDeleteField());
440+
liveDocs.add("v2-" + id);
441+
}
442+
}
443+
try (DirectoryReader unwrapped = DirectoryReader.open(writer)) {
444+
DirectoryReader reader = Lucene.wrapAllDocsLive(unwrapped);
445+
assertThat(reader.numDocs(), equalTo(liveDocs.size()));
446+
IndexSearcher searcher = new IndexSearcher(reader);
447+
Set<String> actualDocs = new HashSet<>();
448+
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE);
449+
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
450+
actualDocs.add(reader.document(scoreDoc.doc).get("id"));
451+
}
452+
assertThat(actualDocs, equalTo(liveDocs));
453+
}
454+
IOUtils.close(writer, dir);
455+
}
456+
457+
public void testWrapLiveDocsNotExposeAbortedDocuments() throws Exception {
458+
Directory dir = newDirectory();
459+
IndexWriterConfig config = newIndexWriterConfig().setSoftDeletesField(Lucene.SOFT_DELETE_FIELD)
460+
.setMergePolicy(new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETE_FIELD, MatchAllDocsQuery::new, newMergePolicy()));
461+
IndexWriter writer = new IndexWriter(dir, config);
462+
int numDocs = between(1, 10);
463+
List<String> liveDocs = new ArrayList<>();
464+
for (int i = 0; i < numDocs; i++) {
465+
String id = Integer.toString(i);
466+
Document doc = new Document();
467+
doc.add(new StringField("id", id, Store.YES));
468+
if (randomBoolean()) {
469+
doc.add(Lucene.newSoftDeleteField());
470+
}
471+
writer.addDocument(doc);
472+
liveDocs.add(id);
473+
}
474+
int abortedDocs = between(1, 10);
475+
for (int i = 0; i < abortedDocs; i++) {
476+
try {
477+
Document doc = new Document();
478+
doc.add(new StringField("id", "aborted-" + i, Store.YES));
479+
StringReader reader = new StringReader("");
480+
doc.add(new TextField("other", reader));
481+
reader.close(); // mark the indexing hit non-aborting error
482+
writer.addDocument(doc);
483+
fail("index should have failed");
484+
} catch (Exception ignored) { }
485+
}
486+
try (DirectoryReader unwrapped = DirectoryReader.open(writer)) {
487+
DirectoryReader reader = Lucene.wrapAllDocsLive(unwrapped);
488+
assertThat(reader.maxDoc(), equalTo(numDocs + abortedDocs));
489+
assertThat(reader.numDocs(), equalTo(liveDocs.size()));
490+
IndexSearcher searcher = new IndexSearcher(reader);
491+
List<String> actualDocs = new ArrayList<>();
492+
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE);
493+
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
494+
actualDocs.add(reader.document(scoreDoc.doc).get("id"));
495+
}
496+
assertThat(actualDocs, equalTo(liveDocs));
497+
}
498+
IOUtils.close(writer, dir);
499+
}
409500
}

server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
import org.elasticsearch.common.lease.Releasable;
6666
import org.elasticsearch.common.lease.Releasables;
6767
import org.elasticsearch.common.lucene.Lucene;
68+
import org.elasticsearch.common.settings.IndexScopedSettings;
6869
import org.elasticsearch.common.settings.Settings;
6970
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
7071
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
@@ -3087,30 +3088,4 @@ public void testSupplyTombstoneDoc() throws Exception {
30873088

30883089
closeShards(shard);
30893090
}
3090-
3091-
public void testSearcherIncludesSoftDeletes() throws Exception {
3092-
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
3093-
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1)
3094-
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
3095-
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true)
3096-
.build();
3097-
IndexMetaData metaData = IndexMetaData.builder("test")
3098-
.putMapping("test", "{ \"properties\": { \"foo\": { \"type\": \"text\"}}}")
3099-
.settings(settings)
3100-
.primaryTerm(0, 1).build();
3101-
IndexShard shard = newShard(new ShardId(metaData.getIndex(), 0), true, "n1", metaData, null);
3102-
recoverShardFromStore(shard);
3103-
indexDoc(shard, "test", "0", "{\"foo\" : \"bar\"}");
3104-
indexDoc(shard, "test", "1", "{\"foo\" : \"baz\"}");
3105-
deleteDoc(shard, "test", "0");
3106-
shard.refresh("test");
3107-
try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
3108-
IndexSearcher searchWithSoftDeletes = new IndexSearcher(Lucene.wrapAllDocsLive(searcher.getDirectoryReader()));
3109-
assertThat(searcher.searcher().search(new TermQuery(new Term("foo", "bar")), 10).totalHits, equalTo(0L));
3110-
assertThat(searchWithSoftDeletes.search(new TermQuery(new Term("foo", "bar")), 10).totalHits, equalTo(1L));
3111-
assertThat(searcher.searcher().search(new TermQuery(new Term("foo", "baz")), 10).totalHits, equalTo(1L));
3112-
assertThat(searchWithSoftDeletes.search(new TermQuery(new Term("foo", "baz")), 10).totalHits, equalTo(1L));
3113-
}
3114-
closeShards(shard);
3115-
}
31163091
}

0 commit comments

Comments
 (0)