34
34
import java .util .Optional ;
35
35
import java .util .Set ;
36
36
import java .util .TreeSet ;
37
+ import java .util .concurrent .atomic .AtomicBoolean ;
37
38
import java .util .concurrent .atomic .AtomicLong ;
38
39
import java .util .function .BooleanSupplier ;
39
40
import java .util .function .LongConsumer ;
@@ -61,6 +62,7 @@ public class RemoteFsTimestampAwareTranslog extends RemoteFsTranslog {
61
62
private final Map <String , Tuple <Long , Long >> oldFormatMetadataFileGenerationMap ;
62
63
private final Map <String , Tuple <Long , Long >> oldFormatMetadataFilePrimaryTermMap ;
63
64
private final AtomicLong minPrimaryTermInRemote = new AtomicLong (Long .MAX_VALUE );
65
+ private final AtomicBoolean triggerTrimOnMinRemoteGenReferencedChange = new AtomicBoolean (false );
64
66
65
67
public RemoteFsTimestampAwareTranslog (
66
68
TranslogConfig config ,
@@ -105,6 +107,11 @@ protected void onDelete() {
105
107
}
106
108
}
107
109
110
+ @ Override
111
+ protected void onMinRemoteGenReferencedChange () {
112
+ triggerTrimOnMinRemoteGenReferencedChange .set (true );
113
+ }
114
+
108
115
@ Override
109
116
public void trimUnreferencedReaders () throws IOException {
110
117
trimUnreferencedReaders (false , true );
@@ -135,14 +142,22 @@ protected void trimUnreferencedReaders(boolean indexDeleted, boolean trimLocal)
135
142
136
143
// This is to ensure that after the permits are acquired during primary relocation, there are no further modification on remote
137
144
// store.
138
- if (startedPrimarySupplier .getAsBoolean () == false || pauseSync .get ()) {
145
+ if (indexDeleted == false && ( startedPrimarySupplier .getAsBoolean () == false || pauseSync .get () )) {
139
146
return ;
140
147
}
141
148
142
149
// This is to fail fast and avoid listing md files un-necessarily.
143
150
if (indexDeleted == false && RemoteStoreUtils .isPinnedTimestampStateStale ()) {
144
- logger .warn ("Skipping remote segment store garbage collection as last fetch of pinned timestamp is stale" );
151
+ logger .warn ("Skipping remote translog garbage collection as last fetch of pinned timestamp is stale" );
152
+ return ;
153
+ }
154
+
155
+ // This code block ensures parity with RemoteFsTranslog. Without this, we will end up making list translog metadata
156
+ // call in each invocation of trimUnreferencedReaders
157
+ if (indexDeleted == false && triggerTrimOnMinRemoteGenReferencedChange .get () == false ) {
145
158
return ;
159
+ } else if (triggerTrimOnMinRemoteGenReferencedChange .get ()) {
160
+ triggerTrimOnMinRemoteGenReferencedChange .set (false );
146
161
}
147
162
148
163
// Since remote generation deletion is async, this ensures that only one generation deletion happens at a time.
@@ -158,24 +173,20 @@ public void onResponse(List<BlobMetadata> blobMetadata) {
158
173
List <String > metadataFiles = blobMetadata .stream ().map (BlobMetadata ::name ).collect (Collectors .toList ());
159
174
160
175
try {
161
- if (metadataFiles .size () <= 1 ) {
176
+ if (indexDeleted == false && metadataFiles .size () <= 1 ) {
162
177
logger .debug ("No stale translog metadata files found" );
163
178
remoteGenerationDeletionPermits .release (REMOTE_DELETION_PERMITS );
164
179
return ;
165
180
}
166
181
167
182
// Check last fetch status of pinned timestamps. If stale, return.
168
183
if (indexDeleted == false && RemoteStoreUtils .isPinnedTimestampStateStale ()) {
169
- logger .warn ("Skipping remote segment store garbage collection as last fetch of pinned timestamp is stale" );
184
+ logger .warn ("Skipping remote translog garbage collection as last fetch of pinned timestamp is stale" );
170
185
remoteGenerationDeletionPermits .release (REMOTE_DELETION_PERMITS );
171
186
return ;
172
187
}
173
188
174
- List <String > metadataFilesToBeDeleted = getMetadataFilesToBeDeleted (
175
- metadataFiles ,
176
- metadataFilePinnedTimestampMap ,
177
- logger
178
- );
189
+ List <String > metadataFilesToBeDeleted = getMetadataFilesToBeDeleted (metadataFiles , indexDeleted );
179
190
180
191
// If index is not deleted, make sure to keep latest metadata file
181
192
if (indexDeleted == false ) {
@@ -194,10 +205,11 @@ public void onResponse(List<BlobMetadata> blobMetadata) {
194
205
metadataFilesNotToBeDeleted .removeAll (metadataFilesToBeDeleted );
195
206
196
207
logger .debug (() -> "metadataFilesNotToBeDeleted = " + metadataFilesNotToBeDeleted );
208
+
197
209
Set <Long > generationsToBeDeleted = getGenerationsToBeDeleted (
198
210
metadataFilesNotToBeDeleted ,
199
211
metadataFilesToBeDeleted ,
200
- indexDeleted
212
+ indexDeleted ? Long . MAX_VALUE : getMinGenerationToKeepInRemote ()
201
213
);
202
214
203
215
logger .debug (() -> "generationsToBeDeleted = " + generationsToBeDeleted );
@@ -208,7 +220,11 @@ public void onResponse(List<BlobMetadata> blobMetadata) {
208
220
generationsToBeDeleted ,
209
221
remoteGenerationDeletionPermits ::release
210
222
);
223
+ } else {
224
+ remoteGenerationDeletionPermits .release ();
225
+ }
211
226
227
+ if (metadataFilesToBeDeleted .isEmpty () == false ) {
212
228
// Delete stale metadata files
213
229
translogTransferManager .deleteMetadataFilesAsync (
214
230
metadataFilesToBeDeleted ,
@@ -217,11 +233,10 @@ public void onResponse(List<BlobMetadata> blobMetadata) {
217
233
218
234
// Update cache to keep only those metadata files that are not getting deleted
219
235
oldFormatMetadataFileGenerationMap .keySet ().retainAll (metadataFilesNotToBeDeleted );
220
-
221
236
// Delete stale primary terms
222
237
deleteStaleRemotePrimaryTerms (metadataFilesNotToBeDeleted );
223
238
} else {
224
- remoteGenerationDeletionPermits .release (REMOTE_DELETION_PERMITS );
239
+ remoteGenerationDeletionPermits .release ();
225
240
}
226
241
} catch (Exception e ) {
227
242
remoteGenerationDeletionPermits .release (REMOTE_DELETION_PERMITS );
@@ -237,18 +252,16 @@ public void onFailure(Exception e) {
237
252
translogTransferManager .listTranslogMetadataFilesAsync (listMetadataFilesListener );
238
253
}
239
254
255
+ private long getMinGenerationToKeepInRemote () {
256
+ return minRemoteGenReferenced - indexSettings ().getRemoteTranslogExtraKeep ();
257
+ }
258
+
240
259
// Visible for testing
241
260
protected Set <Long > getGenerationsToBeDeleted (
242
261
List <String > metadataFilesNotToBeDeleted ,
243
262
List <String > metadataFilesToBeDeleted ,
244
- boolean indexDeleted
263
+ long minGenerationToKeepInRemote
245
264
) throws IOException {
246
- long maxGenerationToBeDeleted = Long .MAX_VALUE ;
247
-
248
- if (indexDeleted == false ) {
249
- maxGenerationToBeDeleted = minRemoteGenReferenced - 1 - indexSettings ().getRemoteTranslogExtraKeep ();
250
- }
251
-
252
265
Set <Long > generationsFromMetadataFilesToBeDeleted = new HashSet <>();
253
266
for (String mdFile : metadataFilesToBeDeleted ) {
254
267
Tuple <Long , Long > minMaxGen = getMinMaxTranslogGenerationFromMetadataFile (mdFile , translogTransferManager );
@@ -262,21 +275,31 @@ protected Set<Long> getGenerationsToBeDeleted(
262
275
Set <Long > generationsToBeDeleted = new HashSet <>();
263
276
for (long generation : generationsFromMetadataFilesToBeDeleted ) {
264
277
// Check if the generation is not referred by metadata file matching pinned timestamps
265
- if (generation <= maxGenerationToBeDeleted && isGenerationPinned (generation , pinnedGenerations ) == false ) {
278
+ // The check with minGenerationToKeep is redundant but kept as to make sure we don't delete generations
279
+ // that are not persisted in remote segment store yet.
280
+ if (generation < minGenerationToKeepInRemote && isGenerationPinned (generation , pinnedGenerations ) == false ) {
266
281
generationsToBeDeleted .add (generation );
267
282
}
268
283
}
269
284
return generationsToBeDeleted ;
270
285
}
271
286
272
- protected List <String > getMetadataFilesToBeDeleted (List <String > metadataFiles ) {
273
- return getMetadataFilesToBeDeleted (metadataFiles , metadataFilePinnedTimestampMap , logger );
287
+ protected List <String > getMetadataFilesToBeDeleted (List <String > metadataFiles , boolean indexDeleted ) {
288
+ return getMetadataFilesToBeDeleted (
289
+ metadataFiles ,
290
+ metadataFilePinnedTimestampMap ,
291
+ getMinGenerationToKeepInRemote (),
292
+ indexDeleted ,
293
+ logger
294
+ );
274
295
}
275
296
276
297
// Visible for testing
277
298
protected static List <String > getMetadataFilesToBeDeleted (
278
299
List <String > metadataFiles ,
279
300
Map <Long , String > metadataFilePinnedTimestampMap ,
301
+ long minGenerationToKeepInRemote ,
302
+ boolean indexDeleted ,
280
303
Logger logger
281
304
) {
282
305
Tuple <Long , Set <Long >> pinnedTimestampsState = RemoteStorePinnedTimestampService .getPinnedTimestamps ();
@@ -312,6 +335,22 @@ protected static List<String> getMetadataFilesToBeDeleted(
312
335
metadataFilesToBeDeleted .size ()
313
336
);
314
337
338
+ if (indexDeleted == false ) {
339
+ // Filter out metadata files based on minGenerationToKeep
340
+ List <String > metadataFilesContainingMinGenerationToKeep = metadataFilesToBeDeleted .stream ().filter (md -> {
341
+ long maxGeneration = TranslogTransferMetadata .getMaxGenerationFromFileName (md );
342
+ return maxGeneration == -1 || maxGeneration >= minGenerationToKeepInRemote ;
343
+ }).collect (Collectors .toList ());
344
+ metadataFilesToBeDeleted .removeAll (metadataFilesContainingMinGenerationToKeep );
345
+
346
+ logger .trace (
347
+ "metadataFilesContainingMinGenerationToKeep.size = {}, metadataFilesToBeDeleted based on minGenerationToKeep filtering = {}, minGenerationToKeep = {}" ,
348
+ metadataFilesContainingMinGenerationToKeep .size (),
349
+ metadataFilesToBeDeleted .size (),
350
+ minGenerationToKeepInRemote
351
+ );
352
+ }
353
+
315
354
return metadataFilesToBeDeleted ;
316
355
}
317
356
@@ -472,50 +511,60 @@ protected static Tuple<Long, Long> getMinMaxPrimaryTermFromMetadataFile(
472
511
}
473
512
}
474
513
475
- public static void cleanup (TranslogTransferManager translogTransferManager ) throws IOException {
476
- ActionListener <List <BlobMetadata >> listMetadataFilesListener = new ActionListener <>() {
477
- @ Override
478
- public void onResponse (List <BlobMetadata > blobMetadata ) {
479
- List <String > metadataFiles = blobMetadata .stream ().map (BlobMetadata ::name ).collect (Collectors .toList ());
514
+ public static void cleanupOfDeletedIndex (TranslogTransferManager translogTransferManager , boolean forceClean ) throws IOException {
515
+ if (forceClean ) {
516
+ translogTransferManager .delete ();
517
+ } else {
518
+ ActionListener <List <BlobMetadata >> listMetadataFilesListener = new ActionListener <>() {
519
+ @ Override
520
+ public void onResponse (List <BlobMetadata > blobMetadata ) {
521
+ List <String > metadataFiles = blobMetadata .stream ().map (BlobMetadata ::name ).collect (Collectors .toList ());
522
+
523
+ try {
524
+ if (metadataFiles .isEmpty ()) {
525
+ staticLogger .debug ("No stale translog metadata files found" );
526
+ return ;
527
+ }
528
+ List <String > metadataFilesToBeDeleted = getMetadataFilesToBeDeleted (
529
+ metadataFiles ,
530
+ new HashMap <>(),
531
+ Long .MAX_VALUE ,
532
+ true , // This method gets called when the index is no longer present
533
+ staticLogger
534
+ );
535
+ if (metadataFilesToBeDeleted .isEmpty ()) {
536
+ staticLogger .debug ("No metadata files to delete" );
537
+ return ;
538
+ }
539
+ staticLogger .debug (() -> "metadataFilesToBeDeleted = " + metadataFilesToBeDeleted );
480
540
481
- try {
482
- if (metadataFiles .isEmpty ()) {
483
- staticLogger .debug ("No stale translog metadata files found" );
484
- return ;
485
- }
486
- List <String > metadataFilesToBeDeleted = getMetadataFilesToBeDeleted (metadataFiles , new HashMap <>(), staticLogger );
487
- if (metadataFilesToBeDeleted .isEmpty ()) {
488
- staticLogger .debug ("No metadata files to delete" );
489
- return ;
490
- }
491
- staticLogger .debug (() -> "metadataFilesToBeDeleted = " + metadataFilesToBeDeleted );
541
+ // For all the files that we are keeping, fetch min and max generations
542
+ List <String > metadataFilesNotToBeDeleted = new ArrayList <>(metadataFiles );
543
+ metadataFilesNotToBeDeleted .removeAll (metadataFilesToBeDeleted );
544
+ staticLogger .debug (() -> "metadataFilesNotToBeDeleted = " + metadataFilesNotToBeDeleted );
492
545
493
- // For all the files that we are keeping, fetch min and max generations
494
- List <String > metadataFilesNotToBeDeleted = new ArrayList <>(metadataFiles );
495
- metadataFilesNotToBeDeleted .removeAll (metadataFilesToBeDeleted );
496
- staticLogger .debug (() -> "metadataFilesNotToBeDeleted = " + metadataFilesNotToBeDeleted );
546
+ // Delete stale metadata files
547
+ translogTransferManager .deleteMetadataFilesAsync (metadataFilesToBeDeleted , () -> {});
497
548
498
- // Delete stale metadata files
499
- translogTransferManager .deleteMetadataFilesAsync (metadataFilesToBeDeleted , () -> {});
549
+ // Delete stale primary terms
550
+ deleteStaleRemotePrimaryTerms (
551
+ metadataFilesNotToBeDeleted ,
552
+ translogTransferManager ,
553
+ new HashMap <>(),
554
+ new AtomicLong (Long .MAX_VALUE ),
555
+ staticLogger
556
+ );
557
+ } catch (Exception e ) {
558
+ staticLogger .error ("Exception while cleaning up metadata and primary terms" , e );
559
+ }
560
+ }
500
561
501
- // Delete stale primary terms
502
- deleteStaleRemotePrimaryTerms (
503
- metadataFilesNotToBeDeleted ,
504
- translogTransferManager ,
505
- new HashMap <>(),
506
- new AtomicLong (Long .MAX_VALUE ),
507
- staticLogger
508
- );
509
- } catch (Exception e ) {
562
+ @ Override
563
+ public void onFailure (Exception e ) {
510
564
staticLogger .error ("Exception while cleaning up metadata and primary terms" , e );
511
565
}
512
- }
513
-
514
- @ Override
515
- public void onFailure (Exception e ) {
516
- staticLogger .error ("Exception while cleaning up metadata and primary terms" , e );
517
- }
518
- };
519
- translogTransferManager .listTranslogMetadataFilesAsync (listMetadataFilesListener );
566
+ };
567
+ translogTransferManager .listTranslogMetadataFilesAsync (listMetadataFilesListener );
568
+ }
520
569
}
521
570
}
0 commit comments