Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/142460.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
area: Searchable Snapshots
issues: []
pr: 142460
summary: Fix `SharedBytes` mmap leak by closing parent arena on close
type: bug
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.elasticsearch.nativeaccess.CloseableMappedByteBuffer;
import org.elasticsearch.nativeaccess.NativeAccess;

import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
Expand Down Expand Up @@ -75,6 +76,9 @@ public class SharedBytes extends AbstractRefCounted {

private final boolean mmap;

// parent mmap closeables whose arenas must be explicitly closed to unmap the shared cache file
private final Closeable[] mmapCloseables;

SharedBytes(int numRegions, int regionSize, NodeEnvironment environment, IntConsumer writeBytes, IntConsumer readBytes, boolean mmap)
throws IOException {
this.numRegions = numRegions;
Expand Down Expand Up @@ -102,28 +106,38 @@ public class SharedBytes extends AbstractRefCounted {
int mapSize = regionsPerMmap * regionSize;
int lastMapSize = Math.toIntExact(fileSize % mapSize);
int mapCount = Math.toIntExact(fileSize / mapSize) + (lastMapSize == 0 ? 0 : 1);
CloseableMappedByteBuffer[] mmaps = new CloseableMappedByteBuffer[mapCount];
CloseableMappedByteBuffer[] parentMmaps = new CloseableMappedByteBuffer[mapCount];
for (int i = 0; i < mapCount - 1; i++) {
mmaps[i] = map(fileChannel, MapMode.READ_ONLY, (long) mapSize * i, mapSize);
parentMmaps[i] = map(fileChannel, MapMode.READ_ONLY, (long) mapSize * i, mapSize);
}
mmaps[mapCount - 1] = map(
parentMmaps[mapCount - 1] = map(
fileChannel,
MapMode.READ_ONLY,
(long) mapSize * (mapCount - 1),
lastMapSize == 0 ? mapSize : lastMapSize
);
for (int i = 0; i < numRegions; i++) {
ios[i] = new IO(i, mmaps[i / regionsPerMmap].slice((long) (i % regionsPerMmap) * regionSize, regionSize));
ios[i] = new IO(i, parentMmaps[i / regionsPerMmap].slice((long) (i % regionsPerMmap) * regionSize, regionSize));
}
this.mmapCloseables = getMmapCloseables(parentMmaps);
} else {
for (int i = 0; i < numRegions; i++) {
ios[i] = new IO(i, null);
}
this.mmapCloseables = null;
}
this.writeBytes = writeBytes;
this.readBytes = readBytes;
}

private Closeable[] getMmapCloseables(CloseableMappedByteBuffer[] mappedByteBuffers) {
Closeable[] closeables = new Closeable[mappedByteBuffers.length];
for (int i = 0; i < mappedByteBuffers.length; i++) {
closeables[i] = mappedByteBuffers[i]::close;
}
return closeables;
}

/**
* Tries to find a suitable path to a searchable snapshots shared cache file in the data paths founds in the environment.
*
Expand Down Expand Up @@ -305,9 +319,17 @@ public static int readCacheFile(final IO fc, int channelPos, int relativePos, in
@Override
protected void closeInternal() {
try {
IOUtils.close(fileChannel, path == null ? null : () -> Files.deleteIfExists(path));
if (mmapCloseables != null) {
IOUtils.close(mmapCloseables);
}
} catch (IOException e) {
logger.warn("Failed to clean up shared bytes file", e);
logger.warn("Failed to unmap shared bytes", e);
} finally {
try {
IOUtils.close(fileChannel, path == null ? null : () -> Files.deleteIfExists(path));
} catch (IOException e) {
logger.warn("Failed to clean up shared bytes file", e);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.lessThan;

public class SharedBytesTests extends ESTestCase {

Expand Down Expand Up @@ -121,6 +122,64 @@ public void testCopyAllWith0Padding() throws Exception {
}
}

/**
* Best-effort test that mmap'd SharedBytes instances release their mapped memory on close, so
* that the OS can reclaim disk space immediately. Without proper unmapping, each iteration leaks
* the cache file's data blocks (the file is unlinked but the mapping holds the blocks allocated).
*/
public void testMmapResourcesReleasedOnClose() throws Exception {
assumeFalse("mmap not used on Windows", IOUtils.WINDOWS);

int regions = 4;
int regionSize = 1024 * 1024; // 1 MB per region
long cacheFileSize = (long) regions * regionSize; // 4 MB total
int iterations = 100;

var dataPath = createTempDir();
var nodeSettings = Settings.builder()
.put(Node.NODE_NAME_SETTING.getKey(), "node")
.put("path.home", createTempDir())
.putList(Environment.PATH_DATA_SETTING.getKey(), dataPath.toString())
.build();

try (var nodeEnv = new NodeEnvironment(nodeSettings, TestEnvironment.newEnvironment(nodeSettings))) {
var cachePath = nodeEnv.nodeDataPaths()[0].resolve("shared_snapshot_cache");

// Warm up: create and close once to stabilise filesystem state
new SharedBytes(regions, regionSize, nodeEnv, ignored -> {}, ignored -> {}, true).decRef();
assertFalse(Files.exists(cachePath));

long spaceBefore = Environment.getUsableSpace(nodeEnv.nodeDataPaths()[0]);

for (int i = 0; i < iterations; i++) {
SharedBytes sharedBytes = new SharedBytes(regions, regionSize, nodeEnv, ignored -> {}, ignored -> {}, true);
assertTrue("cache file should exist", Files.exists(cachePath));
sharedBytes.decRef();
assertFalse("cache file should be deleted after close", Files.exists(cachePath));
}

long spaceAfter = Environment.getUsableSpace(nodeEnv.nodeDataPaths()[0]);
long spaceLost = spaceBefore - spaceAfter;

// Without the fix, we'd lose ~iterations * cacheFileSize = 400 MB of unreclaimable space.
// With the fix, space is fully reclaimed. Allow a small margin for filesystem overhead and
// concurrent activity.
assertThat(
"Disk space was not reclaimed after closing "
+ iterations
+ " mmap'd SharedBytes instances. "
+ "Lost "
+ spaceLost
+ " bytes (cache file size: "
+ cacheFileSize
+ " bytes). "
+ "This may indicate that mmap buffers are not being properly unmapped on close.",
spaceLost,
lessThan(cacheFileSize * 3)
);
}
}

private static class TestSlowByteArrayInputStream extends ByteArrayInputStream {

private TestSlowByteArrayInputStream(byte[] data) {
Expand Down