Skip to content

Commit bc3ef9a

Browse files
author
Daniel Iancu
committed
OAK-11444: [full-gc] Save document id and empty properties names before deletion
1 parent 2f32f64 commit bc3ef9a

File tree

5 files changed

+330
-20
lines changed

5 files changed

+330
-20
lines changed

oak-run/src/main/java/org/apache/jackrabbit/oak/run/RevisionsCommand.java

+8-8
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ private static class RevisionsOptions extends Utils.NodeStoreOptions {
121121
final OptionSpec<Long> olderThan;
122122
final OptionSpec<Double> delay;
123123
final OptionSpec<Double> fullGcDelayFactor;
124-
final OptionSpec<Long> fullGcMaxAge;
124+
final OptionSpec<Long> fullGcMaxAgeSec;
125125
final OptionSpec<?> continuous;
126126
final OptionSpec<?> fullGCOnly;
127127
final OptionSpec<Boolean> resetFullGC;
@@ -191,10 +191,10 @@ private static class RevisionsOptions extends Utils.NodeStoreOptions {
191191
fullGcProgressSize = parser.accepts("fullGcProgressSize", "The number of documents to check for " +
192192
"garbage in each Full GC cycle")
193193
.withRequiredArg().ofType(Integer.class).defaultsTo(10000);
194-
fullGcMaxAge = parser.accepts("fullGcMaxAge", "The maximum age of the document in seconds " +
194+
fullGcMaxAgeSec = parser.accepts("fullGcMaxAge", "The maximum age of the document in seconds " +
195195
"to be considered for Full GC i.e. Version Garbage Collector (Full GC) logic will only consider those " +
196-
"nodes for Full GC which are not accessed recently (currentTime - lastModifiedTime > fullGcMaxAge)")
197-
.withOptionalArg().ofType(Long.class).defaultsTo(TimeUnit.DAYS.toMillis(1));
196+
"nodes for Full GC which are not accessed recently (currentTime - lastModifiedTime > fullGcMaxAge). Default value is 86400 (oneday)")
197+
.withOptionalArg().ofType(Long.class).defaultsTo(TimeUnit.DAYS.toSeconds(1));
198198
}
199199

200200
public RevisionsOptions parse(String[] args) {
@@ -238,8 +238,8 @@ int getFullGcProgressSize() {
238238
return fullGcProgressSize.value(options);
239239
}
240240

241-
long getFullGcMaxAge() {
242-
return fullGcMaxAge.value(options);
241+
long getFullGcMaxAgeSec() {
242+
return fullGcMaxAgeSec.value(options);
243243
}
244244

245245
double getFullGcDelayFactor() {
@@ -357,7 +357,7 @@ private VersionGarbageCollector bootstrapVGC(RevisionsOptions options, Closer cl
357357
builder.setFullGCDelayFactor(options.getFullGcDelayFactor());
358358
builder.setFullGCBatchSize(options.getFullGcBatchSize());
359359
builder.setFullGCProgressSize(options.getFullGcProgressSize());
360-
builder.setFullGcMaxAgeMillis(SECONDS.toMillis(options.getFullGcMaxAge()));
360+
builder.setFullGcMaxAgeMillis(SECONDS.toMillis(options.getFullGcMaxAgeSec()));
361361

362362
// create a VersionGCSupport while builder is read-write
363363
VersionGCSupport gcSupport = builder.createVersionGCSupport();
@@ -389,7 +389,7 @@ private VersionGarbageCollector bootstrapVGC(RevisionsOptions options, Closer cl
389389
System.out.println("FullGcDelayFactory is : " + options.getFullGcDelayFactor());
390390
System.out.println("FullGcBatchSize is : " + options.getFullGcBatchSize());
391391
System.out.println("FullGcProgressSize is : " + options.getFullGcProgressSize());
392-
System.out.println("FullGcMaxAgeInSecs is : " + options.getFullGcMaxAge());
392+
System.out.println("FullGcMaxAgeInSecs is : " + options.getFullGcMaxAgeSec());
393393
System.out.println("FullGcMaxAgeMillis is : " + builder.getFullGcMaxAgeMillis());
394394
VersionGarbageCollector gc = createVersionGC(builder.build(), gcSupport, options.isDryRun(), builder);
395395

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.jackrabbit.oak.plugins.document;
18+
19+
import org.slf4j.Logger;
20+
import static org.slf4j.LoggerFactory.getLogger;
21+
22+
import java.time.Instant;
23+
import java.util.Collections;
24+
import java.util.Date;
25+
import java.util.List;
26+
import java.util.Map;
27+
import java.util.stream.Collectors;
28+
29+
public class FullGcBin {
30+
private static final Logger LOG = getLogger(FullGcBin.class);
31+
private final DocumentStore documentStore;
32+
private boolean enabled;
33+
34+
public FullGcBin(DocumentStore ds) {
35+
documentStore = ds;
36+
}
37+
38+
public int remove(Map<String, Long> orphanOrDeletedRemovalMap) {
39+
if (orphanOrDeletedRemovalMap.isEmpty() || !addToBin(orphanOrDeletedRemovalMap)) {
40+
return 0;
41+
}
42+
43+
// use remove() with the modified check to rule
44+
// out any further race-condition where this removal
45+
// races with a un-orphan/re-creation as a result of which
46+
// the node should now not be removed. The modified check
47+
// ensures a node would then not be removed
48+
// (and as a result the removedSize != map.size())
49+
return documentStore.remove(Collection.NODES, orphanOrDeletedRemovalMap);
50+
}
51+
52+
public List<NodeDocument> findAndUpdate(List<UpdateOp> updateOpList) {
53+
LOG.info("Updating {} documents", updateOpList.size());
54+
if (updateOpList.isEmpty() || !addToBin(updateOpList)) {
55+
return Collections.emptyList();
56+
}
57+
return documentStore.findAndUpdate(Collection.NODES, updateOpList);
58+
}
59+
60+
private boolean addToBin(Map<String, Long> orphanOrDeletedRemovalMap) {
61+
if (!enabled) {
62+
LOG.info("Bin is disabled, skipping adding delete candidate documents to bin");
63+
return true;
64+
}
65+
LOG.info("Adding {} delete candidate documents to bin", orphanOrDeletedRemovalMap.size());
66+
List<UpdateOp> docs = orphanOrDeletedRemovalMap.keySet().stream()
67+
.map(e -> new UpdateOp(e, true))
68+
.map(this::insertOp)
69+
.collect(Collectors.toList());
70+
try {
71+
return documentStore.create(Collection.SETTINGS, docs);
72+
} catch (Exception e) {
73+
LOG.error("Error while adding delete candidate documents to bin", e);
74+
}
75+
return false;
76+
}
77+
78+
private boolean addToBin(List<UpdateOp> updateOpList) {
79+
if (!enabled) {
80+
LOG.info("Bin is disabled, skipping adding removed properties to bin");
81+
return true;
82+
}
83+
LOG.info("Adding {} removed properties to bin", updateOpList.size());
84+
List<UpdateOp> binOpList = updateOpList.stream().map(this::insertOp).collect(Collectors.toList());
85+
try {
86+
documentStore.createOrUpdate(Collection.SETTINGS, binOpList);
87+
return true;
88+
} catch (Exception e) {
89+
LOG.error("Error while adding removed properties to bin", e);
90+
}
91+
return false;
92+
}
93+
94+
/**
95+
* Create an insert operation from the given update operation
96+
*
97+
* @param op the update operation
98+
* @return the insert operation
99+
*/
100+
private UpdateOp insertOp(UpdateOp op) {
101+
UpdateOp insertOp = new UpdateOp("/bin/" + op.getId(), true);
102+
//copy removed properties to the new document
103+
op.getChanges().forEach((k, v) -> {
104+
if (v.type == UpdateOp.Operation.Type.REMOVE) {
105+
insertOp.set(k.getName(), "");
106+
}
107+
});
108+
//this property is used to track the time when the document was added to the bin
109+
//it can be used as a TTL index property to automatically remove the document after a certain time
110+
//see https://www.mongodb.com/docs/manual/core/index-ttl/#std-label-index-feature-ttl
111+
insertOp.set("_gcCollectedAt", Instant.now().toEpochMilli());
112+
return insertOp;
113+
}
114+
115+
public void setEnabled(boolean value) {
116+
this.enabled = value;
117+
}
118+
}

oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java

+6-9
Original file line numberDiff line numberDiff line change
@@ -1072,6 +1072,8 @@ private class FullGC implements Closeable {
10721072
/** small cache for classification of missing nodes : documents that do not exist vs deleted nodes */
10731073
private final LinkedHashMap<Path, Boolean> missingDocsTypes;
10741074

1075+
private final FullGcBin fullGcBin;
1076+
10751077
public FullGC(@NotNull RevisionVector headRevision, long toModifiedMs,
10761078
LinkedHashMap<Path, Boolean> missingDocsTypes, @NotNull GCMonitor monitor,
10771079
@NotNull AtomicBoolean cancel) {
@@ -1090,6 +1092,7 @@ public FullGC(@NotNull RevisionVector headRevision, long toModifiedMs,
10901092
// clusterId is not used
10911093
this.revisionForModified = Revision.newRevision(0);
10921094
this.root = nodeStore.getRoot(headRevision);
1095+
this.fullGcBin = new FullGcBin(ds);
10931096
}
10941097

10951098
public void collectGarbage(final NodeDocument doc, final GCPhases phases) {
@@ -1947,15 +1950,9 @@ public void removeGarbage(final VersionGCStats stats) {
19471950
}
19481951
if (!isFullGCDryRun) {
19491952
// only delete these in case it is not a dryRun
1950-
19511953
if (!orphanOrDeletedRemovalMap.isEmpty()) {
1952-
// use remove() with the modified check to rule
1953-
// out any further race-condition where this removal
1954-
// races with a un-orphan/re-creation as a result of which
1955-
// the node should now not be removed. The modified check
1956-
// ensures a node would then not be removed
1957-
// (and as a result the removedSize != map.size())
1958-
final int removedSize = ds.remove(NODES, orphanOrDeletedRemovalMap);
1954+
1955+
final int removedSize = fullGcBin.remove(orphanOrDeletedRemovalMap);
19591956
stats.updatedFullGCDocsCount += removedSize;
19601957
stats.deletedDocGCCount += removedSize;
19611958
stats.deletedOrphanNodesCount += removedSize;
@@ -1973,7 +1970,7 @@ public void removeGarbage(final VersionGCStats stats) {
19731970
}
19741971

19751972
if (!updateOpList.isEmpty()) {
1976-
List<NodeDocument> oldDocs = ds.findAndUpdate(NODES, updateOpList);
1973+
List<NodeDocument> oldDocs = fullGcBin.findAndUpdate(updateOpList);
19771974

19781975

19791976
int deletedProps = oldDocs.stream().filter(Objects::nonNull).mapToInt(d -> deletedPropsCountMap.getOrDefault(d.getId(), 0)).sum();

oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoVersionGCSupport.java

+2-3
Original file line numberDiff line numberDiff line change
@@ -251,9 +251,8 @@ private void logQueryExplain(String logMsg, @NotNull Bson query, Bson hint) {
251251
public Iterable<NodeDocument> getModifiedDocs(final long fromModified, final long toModified, final int limit,
252252
@NotNull final String fromId, @NotNull Set<String> includedPathPrefixes,
253253
@NotNull Set<String> excludedPathPrefixes) {
254-
LOG.info("getModifiedDocs fromModified: {}, toModified: {}, limit: {}, fromId: {}, includedPathPrefixes: {}, excludedPathPrefixes: {}",
255-
fromModified, toModified, limit, fromId, includedPathPrefixes, excludedPathPrefixes);
256-
254+
LOG.info("getModifiedDocs fromModified: {} ({}), toModified: {} ({}), limit: {}, fromId: {}, includedPathPrefixes: {}, excludedPathPrefixes: {}",
255+
fromModified, Utils.timestampToString(fromModified), toModified, Utils.timestampToString(toModified), limit, fromId, includedPathPrefixes, excludedPathPrefixes);
257256
final long fromModifiedQuery;
258257
if (MIN_ID_VALUE.equals(fromId)) {
259258
// If fromId is MIN_ID_VALUE, round fromModified to 5 second resolution

0 commit comments

Comments
 (0)