diff --git a/core/trino-main/src/main/java/io/trino/testing/TestingNodeManager.java b/core/trino-main/src/main/java/io/trino/testing/TestingNodeManager.java index 965212f6046f..832f638032f3 100644 --- a/core/trino-main/src/main/java/io/trino/testing/TestingNodeManager.java +++ b/core/trino-main/src/main/java/io/trino/testing/TestingNodeManager.java @@ -25,6 +25,7 @@ import java.util.Set; import java.util.concurrent.CopyOnWriteArraySet; +import static com.google.common.collect.ImmutableSet.toImmutableSet; import static java.util.Objects.requireNonNull; public class TestingNodeManager @@ -35,15 +36,26 @@ public class TestingNodeManager private final String environment; private final Node localNode; private final Set nodes = new CopyOnWriteArraySet<>(); + private final boolean scheduleOnCoordinator; public TestingNodeManager() { this(TEST_ENVIRONMENT); } + public TestingNodeManager(boolean scheduleOnCoordinator) + { + this(TEST_ENVIRONMENT, scheduleOnCoordinator); + } + public TestingNodeManager(String environment) { - this(environment, new InternalNode("local", URI.create("local://127.0.0.1"), NodeVersion.UNKNOWN, true), ImmutableSet.of()); + this(environment, true); + } + + public TestingNodeManager(String environment, boolean scheduleOnCoordinator) + { + this(environment, new InternalNode("local", URI.create("local://127.0.0.1"), NodeVersion.UNKNOWN, true), ImmutableSet.of(), scheduleOnCoordinator); } public TestingNodeManager(Node localNode) @@ -58,13 +70,14 @@ public TestingNodeManager(List allNodes) public TestingNodeManager(Node localNode, Collection otherNodes) { - this(TEST_ENVIRONMENT, localNode, otherNodes); + this(TEST_ENVIRONMENT, localNode, otherNodes, true); } - public TestingNodeManager(String environment, Node localNode, Collection otherNodes) + public TestingNodeManager(String environment, Node localNode, Collection otherNodes, boolean scheduleOnCoordinator) { this.environment = environment; this.localNode = requireNonNull(localNode, "localNode is null"); + this.scheduleOnCoordinator = scheduleOnCoordinator; nodes.add(localNode); nodes.addAll(otherNodes); } @@ -74,6 +87,11 @@ public void addNode(Node node) nodes.add(node); } + public void removeNode(Node node) + { + nodes.remove(node); + } + @Override public Set getAllNodes() { @@ -83,7 +101,12 @@ public Set getAllNodes() @Override public Set getWorkerNodes() { - return nodes; + if (scheduleOnCoordinator) { + return nodes; + } + return nodes.stream() + .filter(node -> !node.isCoordinator()) + .collect(toImmutableSet()); } @Override diff --git a/lib/trino-filesystem-cache-alluxio/pom.xml b/lib/trino-filesystem-cache-alluxio/pom.xml new file mode 100644 index 000000000000..932515822dc3 --- /dev/null +++ b/lib/trino-filesystem-cache-alluxio/pom.xml @@ -0,0 +1,162 @@ + + + 4.0.0 + + + io.trino + trino-root + 438-SNAPSHOT + ../../pom.xml + + + trino-filesystem-cache-alluxio + Trino Filesystem - Alluxio + + + ${project.parent.basedir} + true + + + + + com.google.errorprone + error_prone_annotations + + + + com.google.guava + guava + + + com.google.code.findbugs + jsr305 + + + + + + com.google.inject + guice + + + + io.airlift + configuration + + + + io.airlift + stats + + + + io.airlift + units + + + + io.trino + trino-filesystem + + + + jakarta.validation + jakarta.validation-api + + + + org.alluxio + alluxio-core-client-fs + + + + org.alluxio + alluxio-core-common + + + + org.weakref + jmxutils + + + + io.airlift + slice + runtime + + + + io.trino + trino-spi + runtime + + + + io.airlift + log-manager + test + + + + io.trino + trino-client + test + + + + io.trino + trino-filesystem + ${project.version} + tests + test + + + + io.trino + trino-main + test + + + + io.trino + trino-testing + test + + + + io.trino.hive + hive-apache + test + + + + it.unimi.dsi + fastutil + test + + + + org.assertj + assertj-core + test + + + + org.junit.jupiter + junit-jupiter-api + test + + + + org.junit.jupiter + junit-jupiter-engine + test + + + + org.testcontainers + testcontainers + test + + + diff --git a/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioCacheStats.java b/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioCacheStats.java new file mode 100644 index 000000000000..5fda8e3eadd2 --- /dev/null +++ b/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioCacheStats.java @@ -0,0 +1,54 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.alluxio; + +import com.google.errorprone.annotations.ThreadSafe; +import io.airlift.stats.DistributionStat; +import io.trino.filesystem.Location; +import org.weakref.jmx.Managed; +import org.weakref.jmx.Nested; + +@ThreadSafe +public class AlluxioCacheStats + implements CacheStats +{ + private final DistributionStat externalReads = new DistributionStat(); + private final DistributionStat cacheReads = new DistributionStat(); + + @Managed + @Nested + public DistributionStat getExternalReads() + { + return externalReads; + } + + @Managed + @Nested + public DistributionStat getCacheReads() + { + return cacheReads; + } + + @Override + public void recordCacheRead(Location location, int length) + { + cacheReads.add(length); + } + + @Override + public void recordExternalRead(Location location, int length) + { + externalReads.add(length); + } +} diff --git a/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioFileSystemCache.java b/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioFileSystemCache.java new file mode 100644 index 000000000000..4ef8ded875e4 --- /dev/null +++ b/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioFileSystemCache.java @@ -0,0 +1,82 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.alluxio; + +import alluxio.client.file.CacheContext; +import alluxio.client.file.URIStatus; +import alluxio.client.file.cache.CacheManager; +import alluxio.conf.AlluxioConfiguration; +import alluxio.wire.FileInfo; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hashing; +import com.google.inject.Inject; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoInput; +import io.trino.filesystem.TrinoInputFile; +import io.trino.filesystem.TrinoInputStream; +import io.trino.filesystem.cache.TrinoFileSystemCache; + +import java.io.IOException; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.Objects.requireNonNull; + +public class AlluxioFileSystemCache + implements TrinoFileSystemCache +{ + private final CacheManager cacheManager; + private final AlluxioConfiguration config; + private final CacheStats statistics; + private final HashFunction hashFunction = Hashing.murmur3_128(); + + @Inject + public AlluxioFileSystemCache(CacheManager cacheManager, AlluxioConfiguration config, CacheStats statistics) + { + this.cacheManager = requireNonNull(cacheManager, "cacheManager is null"); + this.config = requireNonNull(config, "config is null"); + this.statistics = requireNonNull(statistics, "statistics is null"); + } + + @Override + public TrinoInput cacheInput(TrinoInputFile delegate, String key) + throws IOException + { + return new AlluxioInput(delegate, uriStatus(delegate, key), cacheManager, config, statistics); + } + + @Override + public TrinoInputStream cacheStream(TrinoInputFile delegate, String key) + throws IOException + { + return new AlluxioInputStream(delegate, uriStatus(delegate, key), cacheManager, config, statistics); + } + + @Override + public void expire(Location source) + throws IOException + { + } + + @VisibleForTesting + protected URIStatus uriStatus(TrinoInputFile file, String key) + throws IOException + { + FileInfo info = new FileInfo() + .setPath(file.location().toString()) + .setLength(file.length()); + String cacheIdentifier = hashFunction.hashString(key, UTF_8).toString(); + return new URIStatus(info, CacheContext.defaults().setCacheIdentifier(cacheIdentifier)); + } +} diff --git a/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioFileSystemCacheConfig.java b/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioFileSystemCacheConfig.java new file mode 100644 index 000000000000..8ee0e464a288 --- /dev/null +++ b/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioFileSystemCacheConfig.java @@ -0,0 +1,126 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.alluxio; + +import com.google.common.base.Splitter; +import com.google.common.collect.ImmutableList; +import io.airlift.configuration.Config; +import io.airlift.configuration.ConfigDescription; +import io.airlift.units.DataSize; +import io.airlift.units.Duration; +import io.airlift.units.MaxDataSize; +import io.airlift.units.MinDataSize; +import io.airlift.units.MinDuration; +import jakarta.validation.constraints.Max; +import jakarta.validation.constraints.Min; +import jakarta.validation.constraints.NotNull; + +import java.util.List; +import java.util.Optional; + +import static com.google.common.base.MoreObjects.firstNonNull; +import static com.google.common.collect.ImmutableList.toImmutableList; + +public class AlluxioFileSystemCacheConfig +{ + private static final Splitter SPLITTER = Splitter.on(',').trimResults().omitEmptyStrings(); + + static final String CACHE_DIRECTORIES = "fs.cache.directories"; + static final String CACHE_MAX_SIZES = "fs.cache.max-sizes"; + static final String CACHE_MAX_PERCENTAGES = "fs.cache.max-disk-usage-percentages"; + + private List cacheDirectories; + private List maxCacheSizes = ImmutableList.of(); + private Optional cacheTTL = Optional.of(Duration.valueOf("7d")); + private List maxCacheDiskUsagePercentages = ImmutableList.of(); + private DataSize cachePageSize = DataSize.valueOf("1MB"); + + @NotNull + public List getCacheDirectories() + { + return cacheDirectories; + } + + @Config(CACHE_DIRECTORIES) + @ConfigDescription("Base directory to cache data. Use a comma-separated list to cache data in multiple directories.") + public AlluxioFileSystemCacheConfig setCacheDirectories(String cacheDirectories) + { + this.cacheDirectories = cacheDirectories == null ? null : SPLITTER.splitToList(cacheDirectories); + return this; + } + + public List getMaxCacheSizes() + { + return maxCacheSizes; + } + + @Config(CACHE_MAX_SIZES) + @ConfigDescription("The maximum cache size for a cache directory. Use a comma-separated list of sizes to specify allowed maximum values for each directory.") + public AlluxioFileSystemCacheConfig setMaxCacheSizes(String maxCacheSizes) + { + this.maxCacheSizes = SPLITTER.splitToStream(firstNonNull(maxCacheSizes, "")).map(DataSize::valueOf).collect(toImmutableList()); + return this; + } + + @NotNull + public Optional<@MinDuration("0s") Duration> getCacheTTL() + { + return cacheTTL; + } + + @Config("fs.cache.ttl") + @ConfigDescription("Duration to keep files in the cache prior to eviction") + public AlluxioFileSystemCacheConfig setCacheTTL(Duration cacheTTL) + { + this.cacheTTL = Optional.of(cacheTTL); + return this; + } + + public AlluxioFileSystemCacheConfig disableTTL() + { + this.cacheTTL = Optional.empty(); + return this; + } + + public List<@Min(0) @Max(100) Integer> getMaxCacheDiskUsagePercentages() + { + return maxCacheDiskUsagePercentages; + } + + @Config(CACHE_MAX_PERCENTAGES) + @ConfigDescription("The maximum percentage (0-100) of total disk size the cache can use. Use a comma-separated list of percentage values if supplying several cache directories.") + public AlluxioFileSystemCacheConfig setMaxCacheDiskUsagePercentages(String maxCacheDiskUsagePercentages) + { + this.maxCacheDiskUsagePercentages = SPLITTER.splitToStream(firstNonNull(maxCacheDiskUsagePercentages, "")) + .map(Integer::valueOf) + .collect(toImmutableList()); + return this; + } + + @NotNull + @MaxDataSize("15MB") + @MinDataSize("64kB") + public DataSize getCachePageSize() + { + return this.cachePageSize; + } + + @Config("fs.cache.alluxio.page-size") + @ConfigDescription("Page size of Alluxio cache") + public AlluxioFileSystemCacheConfig setCachePageSize(DataSize cachePageSize) + { + this.cachePageSize = cachePageSize; + return this; + } +} diff --git a/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioFileSystemCacheModule.java b/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioFileSystemCacheModule.java new file mode 100644 index 000000000000..03b0df71f796 --- /dev/null +++ b/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioFileSystemCacheModule.java @@ -0,0 +1,153 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.alluxio; + +import alluxio.client.file.cache.CacheManager; +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.AlluxioProperties; +import alluxio.conf.InstancedConfiguration; +import alluxio.metrics.MetricsConfig; +import alluxio.metrics.MetricsSystem; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; +import com.google.inject.Binder; +import com.google.inject.Provides; +import com.google.inject.Singleton; +import io.airlift.configuration.AbstractConfigurationAwareModule; +import io.airlift.units.DataSize; +import io.airlift.units.Duration; +import io.trino.filesystem.cache.CachingHostAddressProvider; +import io.trino.filesystem.cache.ConsistentHashingHostAddressProvider; +import io.trino.filesystem.cache.ConsistentHashingHostAddressProviderConfiguration; +import io.trino.filesystem.cache.TrinoFileSystemCache; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Optional; +import java.util.Properties; +import java.util.concurrent.TimeUnit; + +import static alluxio.conf.PropertyKey.USER_CLIENT_CACHE_DIRS; +import static alluxio.conf.PropertyKey.USER_CLIENT_CACHE_ENABLED; +import static alluxio.conf.PropertyKey.USER_CLIENT_CACHE_PAGE_SIZE; +import static alluxio.conf.PropertyKey.USER_CLIENT_CACHE_SIZE; +import static alluxio.conf.PropertyKey.USER_CLIENT_CACHE_TTL_ENABLED; +import static alluxio.conf.PropertyKey.USER_CLIENT_CACHE_TTL_THRESHOLD_SECONDS; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.inject.Scopes.SINGLETON; +import static com.google.inject.multibindings.OptionalBinder.newOptionalBinder; +import static io.airlift.configuration.ConfigBinder.configBinder; +import static io.trino.filesystem.alluxio.AlluxioFileSystemCacheConfig.CACHE_DIRECTORIES; +import static io.trino.filesystem.alluxio.AlluxioFileSystemCacheConfig.CACHE_MAX_PERCENTAGES; +import static io.trino.filesystem.alluxio.AlluxioFileSystemCacheConfig.CACHE_MAX_SIZES; +import static java.lang.String.format; +import static java.lang.String.join; +import static org.weakref.jmx.guice.ExportBinder.newExporter; + +public class AlluxioFileSystemCacheModule + extends AbstractConfigurationAwareModule +{ + @Override + protected void setup(Binder binder) + { + configBinder(binder).bindConfig(AlluxioFileSystemCacheConfig.class); + configBinder(binder).bindConfig(ConsistentHashingHostAddressProviderConfiguration.class); + binder.bind(CacheStats.class).to(AlluxioCacheStats.class).in(SINGLETON); + newExporter(binder).export(CacheStats.class).as(generator -> generator.generatedNameOf(AlluxioCacheStats.class)); + + binder.bind(TrinoFileSystemCache.class).to(AlluxioFileSystemCache.class).in(SINGLETON); + newOptionalBinder(binder, CachingHostAddressProvider.class).setBinding().to(ConsistentHashingHostAddressProvider.class).in(SINGLETON); + + Properties metricProps = new Properties(); + metricProps.put("sink.jmx.class", "alluxio.metrics.sink.JmxSink"); + metricProps.put("sink.jmx.domain", "org.alluxio"); + MetricsSystem.startSinksFromConfig(new MetricsConfig(metricProps)); + } + + @Provides + @Singleton + public static AlluxioConfiguration getAlluxioConfiguration(AlluxioFileSystemCacheConfig config) + { + checkArgument(config.getMaxCacheSizes().isEmpty() ^ config.getMaxCacheDiskUsagePercentages().isEmpty(), + "Either %s or %s must be specified", CACHE_MAX_SIZES, CACHE_MAX_PERCENTAGES); + int size = config.getMaxCacheSizes().isEmpty() ? config.getMaxCacheDiskUsagePercentages().size() : config.getMaxCacheSizes().size(); + checkArgument(config.getCacheDirectories().size() == size, + "%s and %s must have the same size", CACHE_DIRECTORIES, config.getMaxCacheSizes().isEmpty() ? CACHE_MAX_PERCENTAGES : CACHE_MAX_SIZES); + config.getCacheDirectories().forEach(directory -> canWrite(Path.of(directory))); + List maxCacheSizes = config.getMaxCacheSizes().isEmpty() ? + calculateMaxCacheSizes(config.getMaxCacheDiskUsagePercentages(), config.getCacheDirectories().stream() + .map(directory -> totalSpace(Path.of(directory))).collect(toImmutableList())) + : config.getMaxCacheSizes(); + + AlluxioProperties alluxioProperties = new AlluxioProperties(); + alluxioProperties.set(USER_CLIENT_CACHE_ENABLED, true); + alluxioProperties.set(USER_CLIENT_CACHE_DIRS, join(",", config.getCacheDirectories())); + alluxioProperties.set(USER_CLIENT_CACHE_SIZE, join(",", maxCacheSizes.stream().map(DataSize::toBytesValueString).toList())); + alluxioProperties.set(USER_CLIENT_CACHE_PAGE_SIZE, config.getCachePageSize().toBytesValueString()); + Optional ttl = config.getCacheTTL(); + if (ttl.isPresent()) { + alluxioProperties.set(USER_CLIENT_CACHE_TTL_THRESHOLD_SECONDS, ttl.orElseThrow().roundTo(TimeUnit.SECONDS)); + alluxioProperties.set(USER_CLIENT_CACHE_TTL_ENABLED, true); + } + return new InstancedConfiguration(alluxioProperties); + } + + @Provides + @Singleton + public static CacheManager getCacheManager(AlluxioConfiguration alluxioConfiguration) + { + try { + return CacheManager.Factory.create(alluxioConfiguration); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + private static List calculateMaxCacheSizes(List cachePercentages, List cacheDiskSizes) + { + ImmutableList.Builder maxCacheSizes = ImmutableList.builderWithExpectedSize(cacheDiskSizes.size()); + for (int i = 0; i < cacheDiskSizes.size(); i++) { + maxCacheSizes.add(DataSize.of(Math.round(cachePercentages.get(i) / 100.0 * cacheDiskSizes.get(i)), DataSize.Unit.BYTE)); + } + return maxCacheSizes.build(); + } + + private static void canWrite(Path path) + { + Path originalPath = path; + while (!Files.exists(path) && path.getParent() != null) { + path = path.getParent(); + } + checkArgument(Files.isDirectory(path), format("Cache directory %s is not a directory", path)); + checkArgument(Files.isReadable(path), format("Cannot read from cache directory %s", originalPath)); + checkArgument(Files.isWritable(path), format("Cannot write to cache directory %s", originalPath)); + } + + /** + * Get total space of the partition named by the path or its parent paths. + */ + @VisibleForTesting + static long totalSpace(Path path) + { + while (!Files.exists(path) && path.getParent() != null) { + path = path.getParent(); + } + return path.toFile().getTotalSpace(); + } +} diff --git a/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioInput.java b/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioInput.java new file mode 100644 index 000000000000..5dc1339509e2 --- /dev/null +++ b/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioInput.java @@ -0,0 +1,126 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.alluxio; + +import alluxio.client.file.URIStatus; +import alluxio.client.file.cache.CacheManager; +import alluxio.conf.AlluxioConfiguration; +import io.trino.filesystem.TrinoInput; +import io.trino.filesystem.TrinoInputFile; + +import java.io.EOFException; +import java.io.IOException; + +import static java.lang.Math.min; +import static java.util.Objects.checkFromIndexSize; +import static java.util.Objects.requireNonNull; + +public class AlluxioInput + implements TrinoInput +{ + private final TrinoInputFile inputFile; + private final long fileLength; + private final CacheStats statistics; + private final AlluxioInputHelper helper; + + private TrinoInput input; + private boolean closed; + + public AlluxioInput( + TrinoInputFile inputFile, + URIStatus status, + CacheManager cacheManager, + AlluxioConfiguration configuration, + CacheStats statistics) + { + this.inputFile = requireNonNull(inputFile, "inputFile is null"); + this.fileLength = requireNonNull(status, "status is null").getLength(); + this.statistics = requireNonNull(statistics, "statistics is null"); + this.helper = new AlluxioInputHelper(inputFile.location(), status, cacheManager, configuration, statistics); + } + + @Override + public void readFully(long position, byte[] buffer, int offset, int length) + throws IOException + { + ensureOpen(); + checkFromIndexSize(offset, length, buffer.length); + if (position < 0) { + throw new IOException("Negative seek offset"); + } + if (length == 0) { + return; + } + + int bytesRead = helper.doCacheRead(position, buffer, offset, length); + if (length > bytesRead && position + bytesRead == fileLength) { + throw new EOFException("Read %s of %s requested bytes: %s".formatted(bytesRead, length, inputFile.location())); + } + doExternalRead(position + bytesRead, buffer, offset + bytesRead, length - bytesRead); + } + + private int doExternalRead(long position, byte[] buffer, int offset, int length) + throws IOException + { + if (length == 0) { + return 0; + } + AlluxioInputHelper.PageAlignedRead aligned = helper.alignRead(position, length); + byte[] readBuffer = new byte[aligned.length()]; + getInput().readFully(aligned.pageStart(), readBuffer, 0, readBuffer.length); + helper.putCache(aligned.pageStart(), aligned.pageEnd(), readBuffer, aligned.length()); + System.arraycopy(readBuffer, aligned.pageOffset(), buffer, offset, length); + statistics.recordExternalRead(inputFile.location(), readBuffer.length); + return length; + } + + private TrinoInput getInput() + throws IOException + { + if (input == null) { + input = inputFile.newInput(); + } + return input; + } + + @Override + public int readTail(byte[] buffer, int bufferOffset, int bufferLength) + throws IOException + { + ensureOpen(); + checkFromIndexSize(bufferOffset, bufferLength, buffer.length); + + int readSize = (int) min(fileLength, bufferLength); + readFully(fileLength - readSize, buffer, bufferOffset, readSize); + return readSize; + } + + private void ensureOpen() + throws IOException + { + if (closed) { + throw new IOException("Stream closed: " + inputFile.location()); + } + } + + @Override + public void close() + throws IOException + { + closed = true; + if (input != null) { + input.close(); + } + } +} diff --git a/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioInputHelper.java b/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioInputHelper.java new file mode 100644 index 000000000000..d93ebc816ce4 --- /dev/null +++ b/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioInputHelper.java @@ -0,0 +1,188 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.alluxio; + +import alluxio.client.file.CacheContext; +import alluxio.client.file.URIStatus; +import alluxio.client.file.cache.CacheManager; +import alluxio.client.file.cache.PageId; +import alluxio.conf.AlluxioConfiguration; +import alluxio.conf.PropertyKey; +import com.google.common.primitives.Ints; +import io.trino.filesystem.Location; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import static com.google.common.base.Preconditions.checkArgument; +import static java.lang.Integer.max; +import static java.lang.Math.addExact; +import static java.lang.Math.min; +import static java.util.Objects.requireNonNull; + +// Inspired by https://github.com/Alluxio/alluxio/blob/4e39eda0305a0042edaeae649b503b4508623619/dora/core/client/fs/src/main/java/alluxio/client/file/cache/LocalCacheFileInStream.java#L50 +// We implement a variant of this class to enable positioned reads +public class AlluxioInputHelper +{ + private final URIStatus status; + private final CacheManager cacheManager; + private final CacheStats statistics; + private final Location location; + private final int pageSize; + private final long fileLength; + private final int bufferSize; + private final byte[] readBuffer; + + // Tracks the start and end positions of the portion of the file in the buffer + private long bufferStartPosition; + private long bufferEndPosition; + + public AlluxioInputHelper(Location location, URIStatus status, CacheManager cacheManager, AlluxioConfiguration configuration, CacheStats statistics) + { + this.status = requireNonNull(status, "status is null"); + this.fileLength = status.getLength(); + this.cacheManager = requireNonNull(cacheManager, "cacheManager is null"); + this.pageSize = (int) requireNonNull(configuration, "configuration is null").getBytes(PropertyKey.USER_CLIENT_CACHE_PAGE_SIZE); + this.statistics = requireNonNull(statistics, "statistics is null"); + this.location = requireNonNull(location, "location is null"); + // Buffer to reduce the cost of doing page aligned reads for small sequential reads pattern + this.bufferSize = pageSize; + this.readBuffer = new byte[bufferSize]; + } + + public int doCacheRead(long position, byte[] bytes, int offset, int length) + throws IOException + { + int bytesRead = doBufferRead(position, bytes, offset, length); + return addExact(bytesRead, doInternalCacheRead(position + bytesRead, bytes, offset + bytesRead, length - bytesRead)); + } + + private int doBufferRead(long position, byte[] bytes, int offset, int length) + { + if (length == 0) { + return 0; + } + if (position < bufferStartPosition || position >= bufferEndPosition) { + return 0; + } + int bytesToCopy = min(length, Ints.saturatedCast(bufferEndPosition - position)); + System.arraycopy(readBuffer, Ints.saturatedCast(position - bufferStartPosition), bytes, offset, bytesToCopy); + return bytesToCopy; + } + + private int doInternalCacheRead(long position, byte[] bytes, int offset, int length) + throws IOException + { + // TODO: Support reading cache hits from the back as well + if (length == 0) { + return 0; + } + int remainingLength = length; + while (remainingLength > 0) { + int bytesReadFromCache = readPageFromCache(position, bytes, offset, remainingLength); + if (bytesReadFromCache == 0) { + break; + } + if (bytesReadFromCache < 0) { + throw new IOException("Read %d bytes from cache".formatted(bytesReadFromCache)); + } + position += bytesReadFromCache; + remainingLength -= bytesReadFromCache; + offset += bytesReadFromCache; + } + int bytesRead = length - remainingLength; + statistics.recordCacheRead(location, bytesRead); + return bytesRead; + } + + private int readPageFromCache(long position, byte[] buffer, int offset, int length) + { + long currentPage = position / pageSize; + int currentPageOffset = (int) (position % pageSize); + int bytesLeftInPage = (int) min(pageSize - currentPageOffset, fileLength - position); + int bytesToReadInPage = min(bytesLeftInPage, length); + if (bytesToReadInPage == 0) { + return 0; + } + CacheContext cacheContext = status.getCacheContext(); + PageId pageId = new PageId(cacheContext.getCacheIdentifier(), currentPage); + if (bytesLeftInPage > length && bufferSize > length) { // Read page into buffer + int putBytes = putBuffer(position, currentPageOffset, pageId, cacheContext); + if (putBytes <= 0) { + return putBytes; + } + return doBufferRead(position, buffer, offset, length); + } + else { + return cacheManager.get(pageId, currentPageOffset, bytesToReadInPage, buffer, offset, cacheContext); + } + } + + private int putBuffer(long position, int pageOffset, PageId pageId, CacheContext cacheContext) + { + pageOffset = min(pageOffset, max(pageSize - bufferSize, 0)); + int bytesToReadInPage = Ints.saturatedCast(min(pageSize - pageOffset, fileLength - position)); + int bytesRead = cacheManager.get(pageId, pageOffset, min(bytesToReadInPage, bufferSize), readBuffer, 0, cacheContext); + if (bytesRead < 0) { + // Buffer could be corrupted + bufferStartPosition = 0; + bufferEndPosition = 0; + return bytesRead; + } + if (bytesRead == 0) { + return bytesRead; + } + bufferStartPosition = pageOffset + (pageId.getPageIndex() * pageSize); + bufferEndPosition = bufferStartPosition + bytesRead; + return bytesRead; + } + + public record PageAlignedRead(long pageStart, long pageEnd, int pageOffset) + { + public int length() + { + return (int) (pageEnd - pageStart); + } + } + + public PageAlignedRead alignRead(long position, long length) + { + long pageStart = position - (position % pageSize); + int pageOffset = (int) (position % pageSize); + long readEnd = position + length; + long alignedReadEnd = readEnd + (pageSize - (readEnd % pageSize)) % pageSize; + long pageEnd = min(alignedReadEnd, fileLength); + return new PageAlignedRead(pageStart, pageEnd, pageOffset); + } + + // Put length bytes from readBuffer into cache between pageStart and pageEnd + public void putCache(long pageStart, long pageEnd, byte[] readBuffer, int length) + { + checkArgument(pageStart + length <= pageEnd); + long end = pageEnd; + if (pageStart + length < pageEnd) { + end = pageStart + length - (length % pageSize); + } + int offset = 0; + while (pageStart < end) { + long currentPage = pageStart / pageSize; + int currentPageSize = (int) min(pageSize, pageEnd - pageStart); + CacheContext cacheContext = status.getCacheContext(); + PageId pageId = new PageId(cacheContext.getCacheIdentifier(), currentPage); + cacheManager.put(pageId, ByteBuffer.wrap(readBuffer, offset, currentPageSize)); + pageStart += currentPageSize; + offset += pageSize; + } + } +} diff --git a/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioInputStream.java b/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioInputStream.java new file mode 100644 index 000000000000..aec2577536d4 --- /dev/null +++ b/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/AlluxioInputStream.java @@ -0,0 +1,205 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.alluxio; + +import alluxio.client.file.URIStatus; +import alluxio.client.file.cache.CacheManager; +import alluxio.conf.AlluxioConfiguration; +import com.google.common.primitives.Longs; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoInputFile; +import io.trino.filesystem.TrinoInputStream; + +import java.io.EOFException; +import java.io.IOException; + +import static com.google.common.primitives.Ints.saturatedCast; +import static java.lang.Integer.max; +import static java.lang.Math.addExact; +import static java.lang.Math.min; +import static java.lang.String.format; +import static java.util.Objects.checkFromIndexSize; +import static java.util.Objects.requireNonNull; + +public class AlluxioInputStream + extends TrinoInputStream +{ + private final TrinoInputFile inputFile; + private final long fileLength; + private final Location location; + private final CacheStats statistics; + private final AlluxioInputHelper helper; + + private TrinoInputStream externalStream; + private long position; + private boolean closed; + + public AlluxioInputStream(TrinoInputFile inputFile, URIStatus status, CacheManager cacheManager, AlluxioConfiguration configuration, CacheStats statistics) + { + this.inputFile = requireNonNull(inputFile, "inputFile is null"); + this.fileLength = requireNonNull(status, "status is null").getLength(); + this.location = inputFile.location(); + this.statistics = requireNonNull(statistics, "statistics is null"); + this.helper = new AlluxioInputHelper(inputFile.location(), status, cacheManager, configuration, statistics); + } + + @Override + public int available() + throws IOException + { + ensureOpen(); + + return saturatedCast(fileLength - position); + } + + @Override + public long getPosition() + { + return position; + } + + private void ensureOpen() + throws IOException + { + if (closed) { + throw new IOException("Output stream closed: " + location); + } + } + + @Override + public int read() + throws IOException + { + ensureOpen(); + + byte[] bytes = new byte[1]; + int n = read(bytes, 0, 1); + if (n == 1) { + // Converts the byte to an unsigned byte, an integer in the range 0 to 255 + return bytes[0] & 0xff; + } + if (n == -1) { + return -1; + } + throw new IOException(format("%d bytes read", n)); + } + + @Override + public int read(byte[] bytes, int offset, int length) + throws IOException + { + ensureOpen(); + + checkFromIndexSize(offset, length, bytes.length); + if (position >= fileLength) { + return -1; + } + int bytesRead = doRead(bytes, offset, length); + position += bytesRead; + return bytesRead; + } + + private int doRead(byte[] bytes, int offset, int length) + throws IOException + { + int bytesRead = helper.doCacheRead(position, bytes, offset, length); + return addExact(bytesRead, doExternalRead(position + bytesRead, bytes, offset + bytesRead, length - bytesRead)); + } + + private int doExternalRead(long readPosition, byte[] buffer, int offset, int length) + throws IOException + { + if (length == 0) { + return 0; + } + AlluxioInputHelper.PageAlignedRead aligned = helper.alignRead(readPosition, length); + if (externalStream == null) { + externalStream = inputFile.newStream(); + } + externalStream.seek(aligned.pageStart()); + byte[] readBuffer = new byte[aligned.length()]; + int externalBytesRead = externalStream.read(readBuffer, 0, aligned.length()); + if (externalBytesRead < 0) { + throw new IOException("Unexpected end of stream"); + } + helper.putCache(aligned.pageStart(), aligned.pageEnd(), readBuffer, externalBytesRead); + int bytesToCopy = min(length, max(externalBytesRead - aligned.pageOffset(), 0)); + System.arraycopy(readBuffer, aligned.pageOffset(), buffer, offset, bytesToCopy); + statistics.recordExternalRead(inputFile.location(), externalBytesRead); + return bytesToCopy; + } + + @Override + public long skip(long n) + throws IOException + { + ensureOpen(); + + n = Longs.constrainToRange(n, 0, fileLength - position); + position += n; + return n; + } + + @Override + public void skipNBytes(long n) + throws IOException + { + ensureOpen(); + + if (n <= 0) { + return; + } + + long position; + try { + position = addExact(this.position, n); + } + catch (ArithmeticException e) { + throw new EOFException("Unable to skip %s bytes (position=%s, fileSize=%s): %s".formatted(n, this.position, fileLength, location)); + } + if (position > fileLength) { + throw new EOFException("Unable to skip %s bytes (position=%s, fileSize=%s): %s".formatted(n, this.position, fileLength, location)); + } + this.position = position; + } + + @Override + public void seek(long position) + throws IOException + { + ensureOpen(); + + if (position < 0) { + throw new IOException("Negative seek offset"); + } + if (position > fileLength) { + throw new IOException("Cannot seek to %s. File size is %s: %s".formatted(position, fileLength, location)); + } + + this.position = position; + } + + @Override + public void close() + throws IOException + { + if (!closed) { + closed = true; + if (externalStream != null) { + externalStream.close(); + externalStream = null; + } + } + } +} diff --git a/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/CacheStats.java b/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/CacheStats.java new file mode 100644 index 000000000000..16fb9a3bf5b9 --- /dev/null +++ b/lib/trino-filesystem-cache-alluxio/src/main/java/io/trino/filesystem/alluxio/CacheStats.java @@ -0,0 +1,23 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.alluxio; + +import io.trino.filesystem.Location; + +public interface CacheStats +{ + void recordCacheRead(Location location, int length); + + void recordExternalRead(Location location, int length); +} diff --git a/lib/trino-filesystem-cache-alluxio/src/test/java/io/trino/filesystem/alluxio/TestAlluxioCacheFileSystem.java b/lib/trino-filesystem-cache-alluxio/src/test/java/io/trino/filesystem/alluxio/TestAlluxioCacheFileSystem.java new file mode 100644 index 000000000000..f5444d17ffcd --- /dev/null +++ b/lib/trino-filesystem-cache-alluxio/src/test/java/io/trino/filesystem/alluxio/TestAlluxioCacheFileSystem.java @@ -0,0 +1,135 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.alluxio; + +import alluxio.conf.AlluxioConfiguration; +import io.airlift.units.DataSize; +import io.trino.filesystem.AbstractTestTrinoFileSystem; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.cache.CacheFileSystem; +import io.trino.filesystem.cache.DefaultCacheKeyProvider; +import io.trino.filesystem.memory.MemoryFileSystem; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Comparator; +import java.util.Iterator; +import java.util.stream.Stream; + +import static org.assertj.core.api.Assertions.assertThat; + +public class TestAlluxioCacheFileSystem + extends AbstractTestTrinoFileSystem +{ + private CacheFileSystem fileSystem; + private Path tempDirectory; + private TestingAlluxioFileSystemCache cache; + private MemoryFileSystem memoryFileSystem; + + @BeforeAll + void beforeAll() + throws IOException + { + tempDirectory = Files.createTempDirectory("test"); + Path cacheDirectory = tempDirectory.resolve("cache"); + Files.createDirectory(cacheDirectory); + AlluxioFileSystemCacheConfig configuration = new AlluxioFileSystemCacheConfig() + .setCacheDirectories(cacheDirectory.toAbsolutePath().toString()) + .setCachePageSize(DataSize.valueOf("32003B")) + .disableTTL() + .setMaxCacheSizes("100MB"); + AlluxioConfiguration alluxioConfiguration = AlluxioFileSystemCacheModule.getAlluxioConfiguration(configuration); + cache = new TestingAlluxioFileSystemCache(alluxioConfiguration, new DefaultCacheKeyProvider()) { + @Override + public void expire(Location location) + { + // Expire the entire cache on a single invalidation + clear(); + } + }; + memoryFileSystem = new MemoryFileSystem(); + fileSystem = new CacheFileSystem(memoryFileSystem, cache, cache.getCacheKeyProvider()); + } + + @AfterEach + void afterEach() + throws IOException + { + cache.clear(); + } + + @AfterAll + void afterAll() + throws IOException + { + cleanupFiles(tempDirectory); + Files.delete(tempDirectory); + } + + private void cleanupFiles(Path directory) + throws IOException + { + // tests will leave directories + try (Stream walk = Files.walk(directory)) { + Iterator iterator = walk.sorted(Comparator.reverseOrder()).iterator(); + while (iterator.hasNext()) { + Path path = iterator.next(); + if (!path.equals(directory)) { + Files.delete(path); + } + } + } + } + + @Override + protected boolean isHierarchical() + { + return false; + } + + @Override + protected boolean isFileContentCaching() + { + return true; + } + + @Override + protected boolean supportsCreateExclusive() + { + return true; + } + + @Override + protected TrinoFileSystem getFileSystem() + { + return fileSystem; + } + + @Override + protected Location getRootLocation() + { + return Location.of("memory://"); + } + + @Override + protected void verifyFileSystemIsEmpty() + { + assertThat(memoryFileSystem.isEmpty()).isTrue(); + } +} diff --git a/lib/trino-filesystem-cache-alluxio/src/test/java/io/trino/filesystem/alluxio/TestAlluxioCacheFileSystemAccessOperations.java b/lib/trino-filesystem-cache-alluxio/src/test/java/io/trino/filesystem/alluxio/TestAlluxioCacheFileSystemAccessOperations.java new file mode 100644 index 000000000000..f6b7602ce895 --- /dev/null +++ b/lib/trino-filesystem-cache-alluxio/src/test/java/io/trino/filesystem/alluxio/TestAlluxioCacheFileSystemAccessOperations.java @@ -0,0 +1,377 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.alluxio; + +import alluxio.conf.AlluxioConfiguration; +import com.google.common.collect.HashMultiset; +import com.google.common.collect.ImmutableMultiset; +import com.google.common.collect.Multiset; +import io.airlift.slice.Slices; +import io.airlift.units.DataSize; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrackingFileSystemFactory; +import io.trino.filesystem.TrackingFileSystemFactory.OperationType; +import io.trino.filesystem.TrinoInput; +import io.trino.filesystem.TrinoInputFile; +import io.trino.filesystem.cache.CacheFileSystem; +import io.trino.filesystem.cache.DefaultCacheKeyProvider; +import io.trino.filesystem.memory.MemoryFileSystemFactory; +import io.trino.spi.security.ConnectorIdentity; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestInstance.Lifecycle; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; + +import static io.trino.filesystem.TrackingFileSystemFactory.OperationType.INPUT_FILE_GET_LENGTH; +import static io.trino.filesystem.TrackingFileSystemFactory.OperationType.INPUT_FILE_LAST_MODIFIED; +import static io.trino.filesystem.TrackingFileSystemFactory.OperationType.INPUT_FILE_NEW_STREAM; +import static io.trino.filesystem.alluxio.TestingAlluxioFileSystemCache.OperationType.CACHE_READ; +import static io.trino.filesystem.alluxio.TestingAlluxioFileSystemCache.OperationType.EXTERNAL_READ; +import static io.trino.testing.MultisetAssertions.assertMultisetsEqual; +import static java.util.Collections.nCopies; +import static java.util.stream.Collectors.toCollection; +import static org.assertj.core.api.Assertions.assertThat; + +@TestInstance(Lifecycle.PER_CLASS) +public class TestAlluxioCacheFileSystemAccessOperations +{ + private static final int CACHE_SIZE = 1024; + private static final int PAGE_SIZE = 128; + + private TrackingFileSystemFactory trackingFileSystemFactory; + private TestingAlluxioFileSystemCache alluxioCache; + private CacheFileSystem fileSystem; + private Path tempDirectory; + + @BeforeAll + public void setUp() + throws IOException + { + tempDirectory = Files.createTempDirectory("test"); + Path cacheDirectory = Files.createDirectory(tempDirectory.resolve("cache")); + + AlluxioFileSystemCacheConfig configuration = new AlluxioFileSystemCacheConfig() + .setCacheDirectories(cacheDirectory.toAbsolutePath().toString()) + .disableTTL() + .setCachePageSize(DataSize.ofBytes(PAGE_SIZE)) + .setMaxCacheSizes(DataSize.ofBytes(CACHE_SIZE).toBytesValueString()); + AlluxioConfiguration alluxioConfiguration = AlluxioFileSystemCacheModule.getAlluxioConfiguration(configuration); + + trackingFileSystemFactory = new TrackingFileSystemFactory(new MemoryFileSystemFactory()); + alluxioCache = new TestingAlluxioFileSystemCache(alluxioConfiguration, new DefaultCacheKeyProvider()); + fileSystem = new CacheFileSystem(trackingFileSystemFactory.create(ConnectorIdentity.ofUser("hello")), + alluxioCache, alluxioCache.getCacheKeyProvider()); + } + + @AfterAll + public void tearDown() + { + trackingFileSystemFactory = null; + fileSystem = null; + tempDirectory.toFile().delete(); + tempDirectory = null; + } + + @Test + public void testCache() + throws IOException + { + Location location = getRootLocation().appendPath("hello"); + byte[] content = "hello world".getBytes(StandardCharsets.UTF_8); + try (OutputStream output = fileSystem.newOutputFile(location).create()) { + output.write(content); + } + + assertReadOperations(location, content, + ImmutableMultiset.builder() + .add(new FileOperation(location, INPUT_FILE_NEW_STREAM)) + .add(new FileOperation(location, INPUT_FILE_GET_LENGTH)) + .add(new FileOperation(location, INPUT_FILE_LAST_MODIFIED)) + .build(), + ImmutableMultiset.builder() + .add(new CacheOperation(location, EXTERNAL_READ)) + .build()); + assertReadOperations(location, content, + ImmutableMultiset.builder() + .add(new FileOperation(location, INPUT_FILE_GET_LENGTH)) + .add(new FileOperation(location, INPUT_FILE_LAST_MODIFIED)) + .build(), + ImmutableMultiset.builder() + .add(new CacheOperation(location, CACHE_READ)) + .build()); + + byte[] modifiedContent = "modified content".getBytes(StandardCharsets.UTF_8); + try (OutputStream output = fileSystem.newOutputFile(location).createOrOverwrite()) { + output.write(modifiedContent); + } + + // Clear the cache, as lastModified time might be unchanged + alluxioCache.clear(); + assertReadOperations(location, modifiedContent, + ImmutableMultiset.builder() + .add(new FileOperation(location, INPUT_FILE_NEW_STREAM)) + .add(new FileOperation(location, INPUT_FILE_GET_LENGTH)) + .add(new FileOperation(location, INPUT_FILE_LAST_MODIFIED)) + .build(), + ImmutableMultiset.builder() + .add(new CacheOperation(location, EXTERNAL_READ)) + .build()); + } + + @Test + public void testPartialCacheHits() + throws IOException + { + Location location = getRootLocation().appendPath("partial"); + byte[] content = new byte[2 * PAGE_SIZE]; + for (int i = 0; i < content.length; i++) { + content[i] = (byte) i; + } + try (OutputStream output = fileSystem.newOutputFile(location).create()) { + output.write(content); + } + + assertSizedReadOperations(location, Arrays.copyOf(content, PAGE_SIZE), + ImmutableMultiset.builder() + .add(new FileOperation(location, INPUT_FILE_NEW_STREAM)) + .add(new FileOperation(location, INPUT_FILE_GET_LENGTH)) + .add(new FileOperation(location, INPUT_FILE_LAST_MODIFIED)) + .build(), + ImmutableMultiset.builder() + .add(new CacheReadOperation(location, EXTERNAL_READ, 128)) + .add(new CacheReadOperation(location, CACHE_READ, 0)) + .build()); + + assertSizedReadOperations(location, Arrays.copyOf(content, PAGE_SIZE + 10), + ImmutableMultiset.builder() + .add(new FileOperation(location, INPUT_FILE_GET_LENGTH)) + .add(new FileOperation(location, INPUT_FILE_NEW_STREAM)) + .add(new FileOperation(location, INPUT_FILE_LAST_MODIFIED)) + .build(), + ImmutableMultiset.builder() + .add(new CacheReadOperation(location, EXTERNAL_READ, 128)) + .add(new CacheReadOperation(location, CACHE_READ, 128)) + .build()); + + assertSizedReadOperations(location, Arrays.copyOf(content, PAGE_SIZE + 10), + ImmutableMultiset.builder() + .add(new FileOperation(location, INPUT_FILE_GET_LENGTH)) + .add(new FileOperation(location, INPUT_FILE_LAST_MODIFIED)) + .build(), + ImmutableMultiset.builder() + .add(new CacheReadOperation(location, CACHE_READ, 138)) + .build()); + + assertSizedReadOperations(location, content, + ImmutableMultiset.builder() + .add(new FileOperation(location, INPUT_FILE_GET_LENGTH)) + .add(new FileOperation(location, INPUT_FILE_LAST_MODIFIED)) + .build(), + ImmutableMultiset.builder() + .add(new CacheReadOperation(location, CACHE_READ, 256)) + .build()); + } + + @Test + public void testMultiPageExternalsReads() + throws IOException + { + Location location = getRootLocation().appendPath("multipage"); + byte[] content = new byte[2 * PAGE_SIZE]; + for (int i = 0; i < content.length; i++) { + content[i] = (byte) i; + } + try (OutputStream output = fileSystem.newOutputFile(location).create()) { + output.write(content); + } + + assertSizedReadOperations(location, Arrays.copyOf(content, PAGE_SIZE + 1), + ImmutableMultiset.builder() + .add(new FileOperation(location, INPUT_FILE_NEW_STREAM)) + .add(new FileOperation(location, INPUT_FILE_GET_LENGTH)) + .add(new FileOperation(location, INPUT_FILE_LAST_MODIFIED)) + .build(), + ImmutableMultiset.builder() + .add(new CacheReadOperation(location, EXTERNAL_READ, 256)) + .add(new CacheReadOperation(location, CACHE_READ, 0)) + .build()); + + assertSizedReadOperations(location, Arrays.copyOf(content, 2 * PAGE_SIZE), + ImmutableMultiset.builder() + .add(new FileOperation(location, INPUT_FILE_GET_LENGTH)) + .add(new FileOperation(location, INPUT_FILE_LAST_MODIFIED)) + .build(), + ImmutableMultiset.builder() + .add(new CacheReadOperation(location, CACHE_READ, 256)) + .build()); + } + + @Test + public void testCacheInvalidation() + throws IOException + { + int cacheSize = (int) (0.9 * CACHE_SIZE); + Location aLocation = createFile("a", cacheSize); + Location bLocation = createFile("b", cacheSize); + Location cLocation = createFile("c", cacheSize / 2); + Location dLocation = createFile("d", cacheSize / 2); + + assertUnCachedRead(aLocation); + assertCachedRead(aLocation); + assertUnCachedRead(bLocation); + assertUnCachedRead(aLocation); + assertCachedRead(aLocation); + assertCachedRead(aLocation); + assertUnCachedRead(bLocation); + assertCachedRead(bLocation); + + assertUnCachedRead(cLocation); + assertUnCachedRead(dLocation); + assertCachedRead(cLocation); + assertCachedRead(dLocation); + + assertUnCachedRead(bLocation); + assertCachedRead(bLocation); + assertUnCachedRead(cLocation); + assertUnCachedRead(dLocation); + } + + private Location createFile(String name, int size) + throws IOException + { + Location location = getRootLocation().appendPath(name); + try (OutputStream output = fileSystem.newOutputFile(location).create()) { + output.write("a".repeat(size).getBytes(StandardCharsets.UTF_8)); + } + return location; + } + + private Location getRootLocation() + { + return Location.of("memory://"); + } + + private void assertCachedRead(Location location) + throws IOException + { + assertReadOperations(location, + ImmutableMultiset.builder() + .add(new FileOperation(location, INPUT_FILE_GET_LENGTH)) + .add(new FileOperation(location, INPUT_FILE_LAST_MODIFIED)) + .build(), + ImmutableMultiset.builder() + .add(new CacheOperation(location, CACHE_READ)) + .build()); + } + + private void assertUnCachedRead(Location location) + throws IOException + { + assertReadOperations(location, + ImmutableMultiset.builder() + .add(new FileOperation(location, INPUT_FILE_NEW_STREAM)) + .add(new FileOperation(location, INPUT_FILE_GET_LENGTH)) + .add(new FileOperation(location, INPUT_FILE_LAST_MODIFIED)) + .build(), + ImmutableMultiset.builder() + .add(new CacheOperation(location, EXTERNAL_READ)) + .build()); + } + + private void assertReadOperations(Location location, Multiset fileOperations, Multiset cacheOperations) + throws IOException + { + TrinoInputFile file = fileSystem.newInputFile(location); + int length = (int) file.length(); + trackingFileSystemFactory.reset(); + alluxioCache.reset(); + try (TrinoInput input = file.newInput()) { + input.readFully(0, length); + } + assertMultisetsEqual(getOperations(), fileOperations); + assertMultisetsEqual(getCacheOperations(), cacheOperations); + } + + private void assertReadOperations(Location location, byte[] content, Multiset fileOperations, Multiset cacheOperations) + throws IOException + { + TrinoInputFile file = fileSystem.newInputFile(location); + int length = content.length; //saturatedCast(file.length()); + trackingFileSystemFactory.reset(); + alluxioCache.reset(); + try (TrinoInput input = file.newInput()) { + assertThat(input.readFully(0, length)).isEqualTo(Slices.wrappedBuffer(content)); + } + assertMultisetsEqual(getOperations(), fileOperations); + assertMultisetsEqual(getCacheOperations(), cacheOperations); + } + + private void assertSizedReadOperations(Location location, byte[] content, Multiset fileOperations, Multiset cacheOperations) + throws IOException + { + TrinoInputFile file = fileSystem.newInputFile(location); + int length = content.length; //saturatedCast(file.length()); + trackingFileSystemFactory.reset(); + alluxioCache.reset(); + try (TrinoInput input = file.newInput()) { + assertThat(input.readFully(0, length)).isEqualTo(Slices.wrappedBuffer(content)); + } + assertMultisetsEqual(getOperations(), fileOperations); + assertMultisetsEqual(getCacheReadOperations(), cacheOperations); + } + + private Multiset getOperations() + { + return trackingFileSystemFactory.getOperationCounts() + .entrySet().stream() + .flatMap(entry -> nCopies(entry.getValue(), new FileOperation( + entry.getKey().location(), + entry.getKey().operationType())).stream()) + .collect(toCollection(HashMultiset::create)); + } + + private Multiset getCacheOperations() + { + return alluxioCache.getOperationCounts() + .entrySet().stream() + .flatMap(entry -> nCopies((int) entry.getValue().stream().filter(l -> l > 0).count(), new CacheOperation( + entry.getKey().location(), + entry.getKey().type())).stream()) + .collect(toCollection(HashMultiset::create)); + } + + private Multiset getCacheReadOperations() + { + return alluxioCache.getOperationCounts() + .entrySet().stream() + .flatMap(entry -> entry.getValue().stream().map(length -> new CacheReadOperation( + entry.getKey().location(), + entry.getKey().type(), length))) + .collect(toCollection(HashMultiset::create)); + } + + private record FileOperation(Location path, OperationType operationType) {} + + private record CacheOperation(Location path, TestingAlluxioFileSystemCache.OperationType operationType) {} + + private record CacheReadOperation(Location path, TestingAlluxioFileSystemCache.OperationType operationType, int length) {} +} diff --git a/lib/trino-filesystem-cache-alluxio/src/test/java/io/trino/filesystem/alluxio/TestAlluxioFileSystemCacheConfig.java b/lib/trino-filesystem-cache-alluxio/src/test/java/io/trino/filesystem/alluxio/TestAlluxioFileSystemCacheConfig.java new file mode 100644 index 000000000000..e3ff602d3be2 --- /dev/null +++ b/lib/trino-filesystem-cache-alluxio/src/test/java/io/trino/filesystem/alluxio/TestAlluxioFileSystemCacheConfig.java @@ -0,0 +1,106 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.alluxio; + +import com.google.common.collect.ImmutableMap; +import io.airlift.units.DataSize; +import io.airlift.units.Duration; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Map; + +import static io.airlift.configuration.testing.ConfigAssertions.assertFullMapping; +import static io.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; +import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; +import static io.trino.filesystem.alluxio.AlluxioFileSystemCacheModule.totalSpace; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.junit.jupiter.api.Assertions.assertEquals; + +class TestAlluxioFileSystemCacheConfig +{ + @Test + public void testInvalidConfiguration() + { + assertThatThrownBy(() -> + AlluxioFileSystemCacheModule.getAlluxioConfiguration( + new AlluxioFileSystemCacheConfig() + .setCacheDirectories("/cache1,/cache2") + .setMaxCacheDiskUsagePercentages("0") + .setMaxCacheSizes("1B"))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Either fs.cache.max-sizes or fs.cache.max-disk-usage-percentages must be specified"); + assertThatThrownBy(() -> + AlluxioFileSystemCacheModule.getAlluxioConfiguration( + new AlluxioFileSystemCacheConfig() + .setCacheDirectories("/cache1,/cache2") + .setMaxCacheSizes("1B"))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("fs.cache.directories and fs.cache.max-sizes must have the same size"); + assertThatThrownBy(() -> + AlluxioFileSystemCacheModule.getAlluxioConfiguration( + new AlluxioFileSystemCacheConfig() + .setCacheDirectories("/cache1,/cache2") + .setMaxCacheDiskUsagePercentages("0"))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("fs.cache.directories and fs.cache.max-disk-usage-percentages must have the same size"); + } + + @Test + void testDefaults() + { + assertRecordedDefaults(recordDefaults(AlluxioFileSystemCacheConfig.class) + .setCacheDirectories(null) + .setCachePageSize(DataSize.valueOf("1MB")) + .setMaxCacheSizes(null) + .setMaxCacheDiskUsagePercentages(null) + .setCacheTTL(Duration.valueOf("7d"))); + } + + @Test + public void testExplicitPropertyMappings() + throws IOException + { + Path cacheDirectory = Files.createTempFile(null, null); + + Map properties = ImmutableMap.builder() + .put("fs.cache.directories", cacheDirectory.toString()) + .put("fs.cache.alluxio.page-size", "7MB") + .put("fs.cache.max-sizes", "1GB") + .put("fs.cache.max-disk-usage-percentages", "50") + .put("fs.cache.ttl", "1d") + .buildOrThrow(); + + AlluxioFileSystemCacheConfig expected = new AlluxioFileSystemCacheConfig() + .setCacheDirectories(cacheDirectory.toString()) + .setCachePageSize(DataSize.valueOf("7MB")) + .setMaxCacheSizes("1GB") + .setMaxCacheDiskUsagePercentages("50") + .setCacheTTL(Duration.valueOf("1d")); + + assertFullMapping(properties, expected); + } + + @Test + public void testTotalSpaceCalculation() + throws IOException + { + Path cacheDirectory = Files.createTempFile(null, null); + + assertEquals(cacheDirectory.toFile().getTotalSpace(), totalSpace(cacheDirectory)); + assertEquals(cacheDirectory.toFile().getTotalSpace(), totalSpace(cacheDirectory.resolve(Path.of("does-not-exist")))); + } +} diff --git a/lib/trino-filesystem-cache-alluxio/src/test/java/io/trino/filesystem/alluxio/TestFuzzAlluxioCacheFileSystem.java b/lib/trino-filesystem-cache-alluxio/src/test/java/io/trino/filesystem/alluxio/TestFuzzAlluxioCacheFileSystem.java new file mode 100644 index 000000000000..7c617f68e50a --- /dev/null +++ b/lib/trino-filesystem-cache-alluxio/src/test/java/io/trino/filesystem/alluxio/TestFuzzAlluxioCacheFileSystem.java @@ -0,0 +1,206 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.alluxio; + +import alluxio.conf.AlluxioConfiguration; +import io.airlift.slice.Slice; +import io.airlift.slice.Slices; +import io.airlift.units.DataSize; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.TrinoInput; +import io.trino.filesystem.cache.CacheFileSystem; +import io.trino.filesystem.cache.DefaultCacheKeyProvider; +import io.trino.filesystem.memory.MemoryFileSystemFactory; +import io.trino.spi.security.ConnectorIdentity; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestInstance.Lifecycle; + +import java.io.Closeable; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Random; + +import static java.lang.Math.min; +import static org.junit.jupiter.api.Assertions.assertEquals; + +@TestInstance(Lifecycle.PER_METHOD) +public class TestFuzzAlluxioCacheFileSystem +{ + private static final int CACHE_SIZE = 8 * 1024; + private static final int PAGE_SIZE = 128; + + @Test + public void testFuzzTrinoInputReadFully() + throws IOException + { + fuzzTrinoInputOperation((fs, l) -> fs.newInputFile(l).newInput(), TrinoInput::readFully); + } + + @Test + public void testFuzzTrinoInputReadTail() + throws IOException + { + fuzzTrinoInputOperation((fs, l) -> fs.newInputFile(l).newInput(), (input, position, buffer, bufferOffset, bufferLength) -> input.readTail(buffer, bufferOffset, bufferLength)); + } + + @Test + public void testFuzzTrinoInputStreamRead() + throws IOException + { + fuzzTrinoInputOperation((fs, l) -> fs.newInputFile(l).newStream(), (input, position, buffer, bufferOffset, bufferLength) -> { + input.seek(position); + input.read(buffer, bufferOffset, bufferLength); + }); + } + + private void fuzzTrinoInputOperation(CreateTrinoInput createInput, TrinoInputOperation operation) + throws IOException + { + Random random = new Random(); + try (TestFileSystem expectedFileSystemState = new TestMemoryFileSystem()) { + try (TestFileSystem actualFileSystemState = new TestAlluxioFileSystem()) { + TrinoFileSystem expectedFileSystem = expectedFileSystemState.create(); + TrinoFileSystem testFileSystem = actualFileSystemState.create(); + + Location expectedLocation = expectedFileSystemState.tempLocation(); + Location testLocation = actualFileSystemState.tempLocation(); + + int fileSize = random.nextInt(0, CACHE_SIZE / 2); + + createTestFile(expectedFileSystem, expectedLocation, fileSize); + createTestFile(testFileSystem, testLocation, fileSize); + + T expectedInput = createInput.apply(expectedFileSystem, expectedLocation); + T actualInput = createInput.apply(testFileSystem, testLocation); + + for (int i = 0; i < 1000; i++) { + applyOperation(random, fileSize, expectedInput, actualInput, operation); + } + } + } + } + + public void applyOperation(Random random, int fileSize, T expectedInput, T actualInput, TrinoInputOperation operation) + throws IOException + { + long position = random.nextLong(0, fileSize + 1); + int bufferSize = random.nextInt(0, fileSize + 1); + int bufferOffset = random.nextInt(0, bufferSize + 1); + int length = bufferSize - bufferOffset; + byte[] bufferExpected = new byte[bufferSize]; + byte[] bufferActual = new byte[bufferSize]; + + operation.apply(expectedInput, position, bufferExpected, bufferOffset, length); + operation.apply(actualInput, position, bufferActual, bufferOffset, length); + + assertEquals(Slices.wrappedBuffer(bufferExpected), Slices.wrappedBuffer(bufferActual)); + } + + private static void createTestFile(TrinoFileSystem fileSystem, Location location, int fileSize) + throws IOException + { + try (OutputStream output = fileSystem.newOutputFile(location).create()) { + byte[] bytes = new byte[4]; + Slice slice = Slices.wrappedBuffer(bytes); + for (int i = 0; i < fileSize; i++) { + slice.setInt(0, i); + output.write(bytes, 0, min(fileSize - i, 4)); + } + } + } + + private interface TestFileSystem + extends Closeable + { + TrinoFileSystem create() + throws IOException; + + Location tempLocation(); + } + + private static class TestMemoryFileSystem + implements TestFileSystem + { + @Override + public TrinoFileSystem create() + { + return new MemoryFileSystemFactory().create(ConnectorIdentity.ofUser("")); + } + + @Override + public Location tempLocation() + { + return Location.of("memory:///fuzz"); + } + + @Override + public void close() + { + } + } + + private static class TestAlluxioFileSystem + implements TestFileSystem + { + private Path tempDirectory; + + @Override + public TrinoFileSystem create() + throws IOException + { + tempDirectory = Files.createTempDirectory("test"); + Path cacheDirectory = Files.createDirectory(tempDirectory.resolve("cache")); + + AlluxioFileSystemCacheConfig configuration = new AlluxioFileSystemCacheConfig() + .setCacheDirectories(cacheDirectory.toAbsolutePath().toString()) + .setCachePageSize(DataSize.ofBytes(PAGE_SIZE)) + .disableTTL() + .setMaxCacheSizes(CACHE_SIZE + "B"); + AlluxioConfiguration alluxioConfiguration = AlluxioFileSystemCacheModule.getAlluxioConfiguration(configuration); + + MemoryFileSystemFactory fileSystemFactory = new MemoryFileSystemFactory(); + TestingAlluxioFileSystemCache alluxioCache = new TestingAlluxioFileSystemCache(alluxioConfiguration, new DefaultCacheKeyProvider()); + return new CacheFileSystem(fileSystemFactory.create(ConnectorIdentity.ofUser("hello")), + alluxioCache, alluxioCache.getCacheKeyProvider()); + } + + @Override + public Location tempLocation() + { + return Location.of("memory:///fuzz"); + } + + @Override + public void close() + { + tempDirectory.toFile().delete(); + } + } + + private interface TrinoInputOperation + { + void apply(T input, long position, byte[] buffer, int bufferOffset, int bufferLength) + throws IOException; + } + + private interface CreateTrinoInput + { + T apply(TrinoFileSystem fileSystem, Location location) + throws IOException; + } +} diff --git a/lib/trino-filesystem-cache-alluxio/src/test/java/io/trino/filesystem/alluxio/TestingAlluxioFileSystemCache.java b/lib/trino-filesystem-cache-alluxio/src/test/java/io/trino/filesystem/alluxio/TestingAlluxioFileSystemCache.java new file mode 100644 index 000000000000..4c0b451499d6 --- /dev/null +++ b/lib/trino-filesystem-cache-alluxio/src/test/java/io/trino/filesystem/alluxio/TestingAlluxioFileSystemCache.java @@ -0,0 +1,151 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.alluxio; + +import alluxio.conf.AlluxioConfiguration; +import com.google.common.collect.ImmutableMap; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoInput; +import io.trino.filesystem.TrinoInputFile; +import io.trino.filesystem.TrinoInputStream; +import io.trino.filesystem.cache.CacheKeyProvider; +import io.trino.filesystem.cache.TrinoFileSystemCache; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +import static java.util.Objects.requireNonNull; + +public class TestingAlluxioFileSystemCache + implements TrinoFileSystemCache +{ + private final AlluxioFileSystemCache cache; + + @Override + public TrinoInput cacheInput(TrinoInputFile delegate, String key) + throws IOException + { + return cache.cacheInput(delegate, key); + } + + @Override + public TrinoInputStream cacheStream(TrinoInputFile delegate, String key) + throws IOException + { + return cache.cacheStream(delegate, key); + } + + @Override + public void expire(Location location) + throws IOException + { + } + + public enum OperationType + { + CACHE_READ, + EXTERNAL_READ, + } + + public record OperationContext(OperationType type, Location location) + { + public OperationContext + { + requireNonNull(type, "type is null"); + requireNonNull(location, "location is null"); + } + } + + private final TestingAlluxioCacheStats statistics; + private final CacheKeyProvider delegateKeyProvider; + private final TestingCacheKeyProvider keyProvider; + private final AtomicInteger cacheGeneration = new AtomicInteger(0); + + public TestingAlluxioFileSystemCache(AlluxioConfiguration alluxioConfiguration, CacheKeyProvider keyProvider) + { + statistics = new TestingAlluxioCacheStats(); + cache = new AlluxioFileSystemCache(AlluxioFileSystemCacheModule.getCacheManager(alluxioConfiguration), alluxioConfiguration, statistics); + delegateKeyProvider = requireNonNull(keyProvider, "keyProvider is null"); + this.keyProvider = new TestingCacheKeyProvider(); + } + + public CacheKeyProvider getCacheKeyProvider() + { + return this.keyProvider; + } + + private class TestingCacheKeyProvider + implements CacheKeyProvider + { + @Override + public Optional getCacheKey(TrinoInputFile delegate) + throws IOException + { + return delegateKeyProvider.getCacheKey(delegate).map(key -> key + cacheGeneration.get()); + } + } + + public void reset() + { + statistics.reset(); + } + + public void clear() + { + cacheGeneration.incrementAndGet(); + } + + public Map> getOperationCounts() + { + return statistics.getOperationCounts(); + } + + private static class TestingAlluxioCacheStats + implements CacheStats + { + private final Map> operationCounts = new ConcurrentHashMap<>(); + + public TestingAlluxioCacheStats() + { + super(); + } + + @Override + public void recordExternalRead(Location location, int length) + { + operationCounts.computeIfAbsent(new OperationContext(OperationType.EXTERNAL_READ, location), k -> new ArrayList<>()).add(length); + } + + @Override + public void recordCacheRead(Location location, int length) + { + operationCounts.computeIfAbsent(new OperationContext(OperationType.CACHE_READ, location), k -> new ArrayList<>()).add(length); + } + + public Map> getOperationCounts() + { + return ImmutableMap.copyOf(operationCounts); + } + + public void reset() + { + operationCounts.clear(); + } + } +} diff --git a/lib/trino-filesystem-manager/pom.xml b/lib/trino-filesystem-manager/pom.xml index ca2db21ef5b9..a7448ea843e3 100644 --- a/lib/trino-filesystem-manager/pom.xml +++ b/lib/trino-filesystem-manager/pom.xml @@ -52,6 +52,11 @@ trino-filesystem-azure + + io.trino + trino-filesystem-cache-alluxio + + io.trino trino-filesystem-gcs diff --git a/lib/trino-filesystem-manager/src/main/java/io/trino/filesystem/manager/FileSystemConfig.java b/lib/trino-filesystem-manager/src/main/java/io/trino/filesystem/manager/FileSystemConfig.java index 13390b5d420b..f8d2b47c1041 100644 --- a/lib/trino-filesystem-manager/src/main/java/io/trino/filesystem/manager/FileSystemConfig.java +++ b/lib/trino-filesystem-manager/src/main/java/io/trino/filesystem/manager/FileSystemConfig.java @@ -15,12 +15,15 @@ import io.airlift.configuration.Config; +import static io.trino.filesystem.manager.FileSystemConfig.CacheType.NONE; + public class FileSystemConfig { private boolean hadoopEnabled = true; private boolean nativeAzureEnabled; private boolean nativeS3Enabled; private boolean nativeGcsEnabled; + private CacheType cacheType = NONE; public boolean isHadoopEnabled() { @@ -69,4 +72,22 @@ public FileSystemConfig setNativeGcsEnabled(boolean nativeGcsEnabled) this.nativeGcsEnabled = nativeGcsEnabled; return this; } + + public CacheType getCacheType() + { + return cacheType; + } + + @Config("fs.cache") + public FileSystemConfig setCacheType(CacheType cacheType) + { + this.cacheType = cacheType; + return this; + } + + public enum CacheType + { + NONE, + ALLUXIO, + } } diff --git a/lib/trino-filesystem-manager/src/main/java/io/trino/filesystem/manager/FileSystemModule.java b/lib/trino-filesystem-manager/src/main/java/io/trino/filesystem/manager/FileSystemModule.java index 57cd749bb05e..6cc84e7e562c 100644 --- a/lib/trino-filesystem-manager/src/main/java/io/trino/filesystem/manager/FileSystemModule.java +++ b/lib/trino-filesystem-manager/src/main/java/io/trino/filesystem/manager/FileSystemModule.java @@ -15,14 +15,22 @@ import com.google.inject.Binder; import com.google.inject.Provides; +import com.google.inject.Scopes; import com.google.inject.Singleton; import io.airlift.bootstrap.LifeCycleManager; import io.airlift.configuration.AbstractConfigurationAwareModule; import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.trace.Tracer; import io.trino.filesystem.TrinoFileSystemFactory; +import io.trino.filesystem.alluxio.AlluxioFileSystemCacheModule; import io.trino.filesystem.azure.AzureFileSystemFactory; import io.trino.filesystem.azure.AzureFileSystemModule; +import io.trino.filesystem.cache.CacheFileSystemFactory; +import io.trino.filesystem.cache.CacheKeyProvider; +import io.trino.filesystem.cache.CachingHostAddressProvider; +import io.trino.filesystem.cache.DefaultCacheKeyProvider; +import io.trino.filesystem.cache.NoneCachingHostAddressProvider; +import io.trino.filesystem.cache.TrinoFileSystemCache; import io.trino.filesystem.gcs.GcsFileSystemFactory; import io.trino.filesystem.gcs.GcsFileSystemModule; import io.trino.filesystem.s3.S3FileSystemFactory; @@ -35,6 +43,7 @@ import static com.google.inject.multibindings.MapBinder.newMapBinder; import static com.google.inject.multibindings.OptionalBinder.newOptionalBinder; +import static io.airlift.configuration.ConditionalModule.conditionalModule; import static java.util.Objects.requireNonNull; public class FileSystemModule @@ -91,6 +100,17 @@ protected void setup(Binder binder) install(new GcsFileSystemModule()); factories.addBinding("gs").to(GcsFileSystemFactory.class); } + + newOptionalBinder(binder, CachingHostAddressProvider.class).setDefault().to(NoneCachingHostAddressProvider.class).in(Scopes.SINGLETON); + newOptionalBinder(binder, CacheKeyProvider.class).setDefault().to(DefaultCacheKeyProvider.class).in(Scopes.SINGLETON); + newMapBinder(binder, FileSystemConfig.CacheType.class, TrinoFileSystemCache.class); + + newOptionalBinder(binder, TrinoFileSystemCache.class); + + install(conditionalModule( + FileSystemConfig.class, + cache -> cache.getCacheType() == FileSystemConfig.CacheType.ALLUXIO, + new AlluxioFileSystemCacheModule())); } @Provides @@ -99,12 +119,17 @@ public TrinoFileSystemFactory createFileSystemFactory( Optional hdfsFileSystemLoader, LifeCycleManager lifeCycleManager, Map factories, + Optional fileSystemCache, + Optional keyProvider, Tracer tracer) { Optional hdfsFactory = hdfsFileSystemLoader.map(HdfsFileSystemLoader::create); hdfsFactory.ifPresent(lifeCycleManager::addInstance); TrinoFileSystemFactory delegate = new SwitchingFileSystemFactory(hdfsFactory, factories); + if (fileSystemCache.isPresent()) { + delegate = new CacheFileSystemFactory(delegate, fileSystemCache.orElseThrow(), keyProvider.orElseThrow()); + } return new TracingFileSystemFactory(tracer, delegate); } } diff --git a/lib/trino-filesystem-manager/src/test/java/io/trino/filesystem/manager/TestFileSystemConfig.java b/lib/trino-filesystem-manager/src/test/java/io/trino/filesystem/manager/TestFileSystemConfig.java index 4e765b70ccee..2b4be5d5e135 100644 --- a/lib/trino-filesystem-manager/src/test/java/io/trino/filesystem/manager/TestFileSystemConfig.java +++ b/lib/trino-filesystem-manager/src/test/java/io/trino/filesystem/manager/TestFileSystemConfig.java @@ -21,6 +21,8 @@ import static io.airlift.configuration.testing.ConfigAssertions.assertFullMapping; import static io.airlift.configuration.testing.ConfigAssertions.assertRecordedDefaults; import static io.airlift.configuration.testing.ConfigAssertions.recordDefaults; +import static io.trino.filesystem.manager.FileSystemConfig.CacheType.ALLUXIO; +import static io.trino.filesystem.manager.FileSystemConfig.CacheType.NONE; public class TestFileSystemConfig { @@ -31,7 +33,8 @@ public void testDefaults() .setHadoopEnabled(true) .setNativeAzureEnabled(false) .setNativeS3Enabled(false) - .setNativeGcsEnabled(false)); + .setNativeGcsEnabled(false) + .setCacheType(NONE)); } @Test @@ -42,13 +45,15 @@ public void testExplicitPropertyMappings() .put("fs.native-azure.enabled", "true") .put("fs.native-s3.enabled", "true") .put("fs.native-gcs.enabled", "true") + .put("fs.cache", "alluxio") .buildOrThrow(); FileSystemConfig expected = new FileSystemConfig() .setHadoopEnabled(false) .setNativeAzureEnabled(true) .setNativeS3Enabled(true) - .setNativeGcsEnabled(true); + .setNativeGcsEnabled(true) + .setCacheType(ALLUXIO); assertFullMapping(properties, expected); } diff --git a/lib/trino-filesystem/pom.xml b/lib/trino-filesystem/pom.xml index f97cb1ca0e88..23adc2655f9a 100644 --- a/lib/trino-filesystem/pom.xml +++ b/lib/trino-filesystem/pom.xml @@ -17,11 +17,37 @@ + + com.github.ishugaliy + allgood-consistent-hash + 1.0.0 + + com.google.guava guava + + com.google.inject + guice + + + + io.airlift + concurrent + + + + io.airlift + configuration + + + + io.airlift + log + + io.airlift slice @@ -52,6 +78,11 @@ trino-spi + + jakarta.annotation + jakarta.annotation-api + + org.jetbrains annotations @@ -60,19 +91,25 @@ io.airlift - concurrent + junit-extensions test io.airlift - junit-extensions + testing test - io.airlift - testing + io.trino + trino-client + test + + + + io.trino + trino-main test @@ -83,6 +120,12 @@ test + + io.trino + trino-testing + test + + org.assertj assertj-core diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/CacheFileSystem.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/CacheFileSystem.java new file mode 100644 index 000000000000..6449db7a0199 --- /dev/null +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/CacheFileSystem.java @@ -0,0 +1,133 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.cache; + +import io.trino.filesystem.FileIterator; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.TrinoInputFile; +import io.trino.filesystem.TrinoOutputFile; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Optional; +import java.util.Set; + +import static java.util.Objects.requireNonNull; + +public final class CacheFileSystem + implements TrinoFileSystem +{ + private final TrinoFileSystem delegate; + private final TrinoFileSystemCache cache; + private final CacheKeyProvider keyProvider; + + public CacheFileSystem(TrinoFileSystem delegate, TrinoFileSystemCache cache, CacheKeyProvider keyProvider) + { + this.delegate = requireNonNull(delegate, "delegate is null"); + this.cache = requireNonNull(cache, "cache is null"); + this.keyProvider = requireNonNull(keyProvider, "keyProvider is null"); + } + + @Override + public TrinoInputFile newInputFile(Location location) + { + return new CacheInputFile(delegate.newInputFile(location), cache, keyProvider); + } + + @Override + public TrinoInputFile newInputFile(Location location, long length) + { + return new CacheInputFile(delegate.newInputFile(location, length), cache, keyProvider); + } + + @Override + public TrinoOutputFile newOutputFile(Location location) + { + TrinoOutputFile output = delegate.newOutputFile(location); + try { + cache.expire(location); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + return output; + } + + @Override + public void deleteFile(Location location) + throws IOException + { + delegate.deleteFile(location); + cache.expire(location); + } + + @Override + public void deleteDirectory(Location location) + throws IOException + { + delegate.deleteDirectory(location); + } + + @Override + public void renameFile(Location source, Location target) + throws IOException + { + delegate.renameFile(source, target); + cache.expire(source); + cache.expire(target); + } + + @Override + public FileIterator listFiles(Location location) + throws IOException + { + return delegate.listFiles(location); + } + + @Override + public Optional directoryExists(Location location) + throws IOException + { + return delegate.directoryExists(location); + } + + @Override + public void createDirectory(Location location) + throws IOException + { + delegate.createDirectory(location); + } + + @Override + public void renameDirectory(Location source, Location target) + throws IOException + { + delegate.renameDirectory(source, target); + } + + @Override + public Set listDirectories(Location location) + throws IOException + { + return delegate.listDirectories(location); + } + + @Override + public Optional createTemporaryDirectory(Location targetPath, String temporaryPrefix, String relativePrefix) + throws IOException + { + return delegate.createTemporaryDirectory(targetPath, temporaryPrefix, relativePrefix); + } +} diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/CacheFileSystemFactory.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/CacheFileSystemFactory.java new file mode 100644 index 000000000000..c6197c43c39e --- /dev/null +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/CacheFileSystemFactory.java @@ -0,0 +1,41 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.cache; + +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.TrinoFileSystemFactory; +import io.trino.spi.security.ConnectorIdentity; + +import static java.util.Objects.requireNonNull; + +public final class CacheFileSystemFactory + implements TrinoFileSystemFactory +{ + private final TrinoFileSystemFactory delegate; + private final TrinoFileSystemCache cache; + private final CacheKeyProvider keyProvider; + + public CacheFileSystemFactory(TrinoFileSystemFactory delegate, TrinoFileSystemCache cache, CacheKeyProvider keyProvider) + { + this.delegate = requireNonNull(delegate, "delegate is null"); + this.cache = requireNonNull(cache, "cache is null"); + this.keyProvider = requireNonNull(keyProvider, "keyProvider is null"); + } + + @Override + public TrinoFileSystem create(ConnectorIdentity identity) + { + return new CacheFileSystem(delegate.create(identity), cache, keyProvider); + } +} diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/CacheInputFile.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/CacheInputFile.java new file mode 100644 index 000000000000..4ba17e972364 --- /dev/null +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/CacheInputFile.java @@ -0,0 +1,89 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.cache; + +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoInput; +import io.trino.filesystem.TrinoInputFile; +import io.trino.filesystem.TrinoInputStream; + +import java.io.IOException; +import java.time.Instant; +import java.util.Optional; + +import static java.util.Objects.requireNonNull; + +public final class CacheInputFile + implements TrinoInputFile +{ + private final TrinoInputFile delegate; + private final TrinoFileSystemCache cache; + private final CacheKeyProvider keyProvider; + + public CacheInputFile(TrinoInputFile delegate, TrinoFileSystemCache cache, CacheKeyProvider keyProvider) + { + this.delegate = requireNonNull(delegate, "delegate is null"); + this.cache = requireNonNull(cache, "cache is null"); + this.keyProvider = requireNonNull(keyProvider, "keyProvider is null"); + } + + @Override + public TrinoInput newInput() + throws IOException + { + Optional key = keyProvider.getCacheKey(delegate); + if (key.isPresent()) { + return cache.cacheInput(delegate, key.orElseThrow()); + } + return delegate.newInput(); + } + + @Override + public TrinoInputStream newStream() + throws IOException + { + Optional key = keyProvider.getCacheKey(delegate); + if (key.isPresent()) { + return cache.cacheStream(delegate, key.orElseThrow()); + } + return delegate.newStream(); + } + + @Override + public long length() + throws IOException + { + return delegate.length(); + } + + @Override + public Instant lastModified() + throws IOException + { + return delegate.lastModified(); + } + + @Override + public boolean exists() + throws IOException + { + return delegate.exists(); + } + + @Override + public Location location() + { + return delegate.location(); + } +} diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/CacheKeyProvider.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/CacheKeyProvider.java new file mode 100644 index 000000000000..add88ae58acf --- /dev/null +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/CacheKeyProvider.java @@ -0,0 +1,28 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.cache; + +import io.trino.filesystem.TrinoInputFile; + +import java.io.IOException; +import java.util.Optional; + +public interface CacheKeyProvider +{ + /** + * Get the cache key of a TrinoInputFile. Returns Optional.empty() if the file is not cacheable. + */ + Optional getCacheKey(TrinoInputFile delegate) + throws IOException; +} diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/CachingHostAddressProvider.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/CachingHostAddressProvider.java new file mode 100644 index 000000000000..18f01a2ba8ad --- /dev/null +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/CachingHostAddressProvider.java @@ -0,0 +1,26 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.cache; + +import io.trino.spi.HostAddress; + +import java.util.List; + +public interface CachingHostAddressProvider +{ + /** + * Returns a lists of hosts which are preferred to cache the split with the given path. + */ + List getHosts(String splitPath); +} diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/ConsistentHashingHostAddressProvider.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/ConsistentHashingHostAddressProvider.java new file mode 100644 index 000000000000..eeb0a9723db7 --- /dev/null +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/ConsistentHashingHostAddressProvider.java @@ -0,0 +1,126 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.cache; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; +import com.google.inject.Inject; +import io.airlift.log.Logger; +import io.trino.spi.HostAddress; +import io.trino.spi.Node; +import io.trino.spi.NodeManager; +import jakarta.annotation.PostConstruct; +import jakarta.annotation.PreDestroy; +import org.ishugaliy.allgood.consistent.hash.ConsistentHash; +import org.ishugaliy.allgood.consistent.hash.HashRing; +import org.ishugaliy.allgood.consistent.hash.hasher.DefaultHasher; + +import java.util.Comparator; +import java.util.List; +import java.util.Set; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static io.airlift.concurrent.Threads.daemonThreadsNamed; +import static java.util.Objects.requireNonNull; +import static java.util.concurrent.Executors.newSingleThreadScheduledExecutor; + +public class ConsistentHashingHostAddressProvider + implements CachingHostAddressProvider +{ + private static final Logger log = Logger.get(ConsistentHashingHostAddressProvider.class); + + private final NodeManager nodeManager; + private final ScheduledExecutorService hashRingUpdater = newSingleThreadScheduledExecutor(daemonThreadsNamed("hash-ring-refresher-%s")); + private final int replicationFactor; + private final Comparator hostAddressComparator = Comparator.comparing(HostAddress::getHostText).thenComparing(HostAddress::getPort); + + private final ConsistentHash consistentHashRing = HashRing.newBuilder() + .hasher(DefaultHasher.METRO_HASH) + .build(); + + @Inject + public ConsistentHashingHostAddressProvider(NodeManager nodeManager, ConsistentHashingHostAddressProviderConfiguration configuration) + { + this.nodeManager = requireNonNull(nodeManager, "nodeManager is null"); + this.replicationFactor = configuration.getPreferredHostsCount(); + } + + @Override + public List getHosts(String splitPath) + { + return consistentHashRing.locate(splitPath, replicationFactor) + .stream() + .map(TrinoNode::getHostAndPort) + .sorted(hostAddressComparator) + .collect(toImmutableList()); + } + + @PostConstruct + public void startRefreshingHashRing() + { + hashRingUpdater.scheduleWithFixedDelay(this::refreshHashRing, 5, 5, TimeUnit.SECONDS); + refreshHashRing(); + } + + @PreDestroy + public void destroy() + { + hashRingUpdater.shutdownNow(); + } + + @VisibleForTesting + synchronized void refreshHashRing() + { + try { + ImmutableSet trinoNodes = nodeManager.getWorkerNodes().stream().map(TrinoNode::of).collect(toImmutableSet()); + Set hashRingNodes = consistentHashRing.getNodes(); + Set removedNodes = Sets.difference(hashRingNodes, trinoNodes); + Set newNodes = Sets.difference(trinoNodes, hashRingNodes); + // Avoid acquiring a write lock in consistentHashRing if possible + if (!newNodes.isEmpty()) { + consistentHashRing.addAll(newNodes); + } + if (!removedNodes.isEmpty()) { + removedNodes.forEach(consistentHashRing::remove); + } + } + catch (Exception e) { + log.error(e, "Error refreshing hash ring"); + } + } + + private record TrinoNode(String nodeIdentifier, HostAddress hostAndPort) + implements org.ishugaliy.allgood.consistent.hash.node.Node + { + public static TrinoNode of(Node node) + { + return new TrinoNode(node.getNodeIdentifier(), node.getHostAndPort()); + } + + public HostAddress getHostAndPort() + { + return hostAndPort; + } + + @Override + public String getKey() + { + return nodeIdentifier; + } + } +} diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/ConsistentHashingHostAddressProviderConfiguration.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/ConsistentHashingHostAddressProviderConfiguration.java new file mode 100644 index 000000000000..7c39c7856adf --- /dev/null +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/ConsistentHashingHostAddressProviderConfiguration.java @@ -0,0 +1,35 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.cache; + +import io.airlift.configuration.Config; +import io.airlift.configuration.ConfigDescription; + +public class ConsistentHashingHostAddressProviderConfiguration +{ + private int preferredHostsCount = 2; + + @Config("fs.cache.preferred-hosts-count") + @ConfigDescription("The number of preferred nodes for caching a file. Defaults to 2.") + public ConsistentHashingHostAddressProviderConfiguration setPreferredHostsCount(int preferredHostsCount) + { + this.preferredHostsCount = preferredHostsCount; + return this; + } + + public int getPreferredHostsCount() + { + return this.preferredHostsCount; + } +} diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/DefaultCacheKeyProvider.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/DefaultCacheKeyProvider.java new file mode 100644 index 000000000000..a7f43725d25c --- /dev/null +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/DefaultCacheKeyProvider.java @@ -0,0 +1,30 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.cache; + +import io.trino.filesystem.TrinoInputFile; + +import java.io.IOException; +import java.util.Optional; + +public final class DefaultCacheKeyProvider + implements CacheKeyProvider +{ + @Override + public Optional getCacheKey(TrinoInputFile delegate) + throws IOException + { + return Optional.of(delegate.location().path() + delegate.lastModified()); + } +} diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/NoneCachingHostAddressProvider.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/NoneCachingHostAddressProvider.java new file mode 100644 index 000000000000..349fc5b7694b --- /dev/null +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/NoneCachingHostAddressProvider.java @@ -0,0 +1,29 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.cache; + +import com.google.common.collect.ImmutableList; +import io.trino.spi.HostAddress; + +import java.util.List; + +public class NoneCachingHostAddressProvider + implements CachingHostAddressProvider +{ + @Override + public List getHosts(String splitPath) + { + return ImmutableList.of(); + } +} diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/TrinoFileSystemCache.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/TrinoFileSystemCache.java new file mode 100644 index 000000000000..ba2d49eccdbb --- /dev/null +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/cache/TrinoFileSystemCache.java @@ -0,0 +1,42 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.cache; + +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoInput; +import io.trino.filesystem.TrinoInputFile; +import io.trino.filesystem.TrinoInputStream; + +import java.io.IOException; + +public interface TrinoFileSystemCache +{ + /** + * Get the TrinoInput of the TrinoInputFile, potentially using or updating the data cached at key. + */ + TrinoInput cacheInput(TrinoInputFile delegate, String key) + throws IOException; + + /** + * Get the TrinoInputStream of the TrinoInputFile, potentially using or updating the data cached at key. + */ + TrinoInputStream cacheStream(TrinoInputFile delegate, String key) + throws IOException; + + /** + * Give a hint to the cache that the cache entry for location should be expired. + */ + void expire(Location location) + throws IOException; +} diff --git a/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryFileSystem.java b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryFileSystem.java index 33f5135ce32b..0a9bc1b74fe9 100644 --- a/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryFileSystem.java +++ b/lib/trino-filesystem/src/main/java/io/trino/filesystem/memory/MemoryFileSystem.java @@ -13,6 +13,7 @@ */ package io.trino.filesystem.memory; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; import io.airlift.slice.Slice; import io.trino.filesystem.FileEntry; @@ -42,7 +43,8 @@ public class MemoryFileSystem { private final ConcurrentMap blobs = new ConcurrentHashMap<>(); - boolean isEmpty() + @VisibleForTesting + public boolean isEmpty() { return blobs.isEmpty(); } diff --git a/lib/trino-filesystem/src/test/java/io/trino/filesystem/AbstractTestTrinoFileSystem.java b/lib/trino-filesystem/src/test/java/io/trino/filesystem/AbstractTestTrinoFileSystem.java index ee5a7c03b8b7..2aad96f3f157 100644 --- a/lib/trino-filesystem/src/test/java/io/trino/filesystem/AbstractTestTrinoFileSystem.java +++ b/lib/trino-filesystem/src/test/java/io/trino/filesystem/AbstractTestTrinoFileSystem.java @@ -102,6 +102,11 @@ protected boolean seekPastEndOfFileFails() return true; } + protected boolean isFileContentCaching() + { + return false; + } + protected Location createLocation(String path) { if (path.isEmpty()) { @@ -1011,14 +1016,17 @@ public void testFileWithTrailingWhitespace() try (OutputStream outputStream = getFileSystem().newOutputFile(location).createOrOverwrite()) { outputStream.write(newContents.clone()); } - try (TrinoInputStream inputStream = inputFile.newStream()) { + // Open a new input file with an updated file length. If we read with the old inputFile the cached (wrong) file length would be used. + // This can break some file system read operations (e.g., TrinoInput.readTail for most filesystems, newStream for caching file systems). + TrinoInputFile newInputFile = getFileSystem().newInputFile(location); + try (TrinoInputStream inputStream = newInputFile.newStream()) { byte[] bytes = ByteStreams.toByteArray(inputStream); assertThat(bytes).isEqualTo(newContents); } // Verify deleting getFileSystem().deleteFile(location); - assertThat(inputFile.exists()).as("exists after delete").isFalse(); + assertThat(newInputFile.exists()).as("exists after delete").isFalse(); // Verify renames if (supportsRenameFile()) { diff --git a/lib/trino-filesystem/src/test/java/io/trino/filesystem/cache/TestCacheFileSystem.java b/lib/trino-filesystem/src/test/java/io/trino/filesystem/cache/TestCacheFileSystem.java new file mode 100644 index 000000000000..4fd6c095bfa4 --- /dev/null +++ b/lib/trino-filesystem/src/test/java/io/trino/filesystem/cache/TestCacheFileSystem.java @@ -0,0 +1,74 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.cache; + +import io.trino.filesystem.AbstractTestTrinoFileSystem; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.memory.MemoryFileSystem; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; + +import static org.assertj.core.api.Assertions.assertThat; + +public class TestCacheFileSystem + extends AbstractTestTrinoFileSystem +{ + private MemoryFileSystem delegate; + private CacheFileSystem fileSystem; + + @BeforeAll + void setUp() + { + delegate = new MemoryFileSystem(); + fileSystem = new CacheFileSystem(delegate, new TestingMemoryFileSystemCache(), new DefaultCacheKeyProvider()); + } + + @AfterAll + void tearDown() + { + delegate = null; + fileSystem = null; + } + + @Override + protected boolean isHierarchical() + { + return false; + } + + @Override + protected boolean supportsCreateExclusive() + { + return true; + } + + @Override + protected TrinoFileSystem getFileSystem() + { + return fileSystem; + } + + @Override + protected Location getRootLocation() + { + return Location.of("memory://"); + } + + @Override + protected void verifyFileSystemIsEmpty() + { + assertThat(delegate.isEmpty()).isTrue(); + } +} diff --git a/lib/trino-filesystem/src/test/java/io/trino/filesystem/cache/TestCacheFileSystemAccessOperations.java b/lib/trino-filesystem/src/test/java/io/trino/filesystem/cache/TestCacheFileSystemAccessOperations.java new file mode 100644 index 000000000000..e84243ac97e3 --- /dev/null +++ b/lib/trino-filesystem/src/test/java/io/trino/filesystem/cache/TestCacheFileSystemAccessOperations.java @@ -0,0 +1,122 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.cache; + +import com.google.common.collect.HashMultiset; +import com.google.common.collect.ImmutableMultiset; +import com.google.common.collect.Multiset; +import io.airlift.slice.Slices; +import io.trino.filesystem.Location; +import io.trino.filesystem.TrackingFileSystemFactory; +import io.trino.filesystem.TrackingFileSystemFactory.OperationType; +import io.trino.filesystem.TrinoInput; +import io.trino.filesystem.TrinoInputFile; +import io.trino.filesystem.memory.MemoryFileSystemFactory; +import io.trino.spi.block.TestingSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; + +import static io.trino.testing.MultisetAssertions.assertMultisetsEqual; +import static java.util.Collections.nCopies; +import static java.util.stream.Collectors.toCollection; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.TestInstance.Lifecycle; + +@TestInstance(Lifecycle.PER_CLASS) +public class TestCacheFileSystemAccessOperations +{ + private TrackingFileSystemFactory trackingFileSystemFactory; + private CacheFileSystem fileSystem; + + @BeforeAll + void setUp() + { + trackingFileSystemFactory = new TrackingFileSystemFactory(new MemoryFileSystemFactory()); + fileSystem = new CacheFileSystem(trackingFileSystemFactory.create(TestingSession.SESSION), new TestingMemoryFileSystemCache(), new DefaultCacheKeyProvider()); + } + + @AfterAll + void tearDown() + { + trackingFileSystemFactory = null; + fileSystem = null; + } + + @Test + void testCache() + throws IOException + { + Location location = getRootLocation().appendPath("hello"); + byte[] content = "hello world".getBytes(StandardCharsets.UTF_8); + try (OutputStream output = fileSystem.newOutputFile(location).create()) { + output.write(content); + } + + assertReadOperations(location, content, + ImmutableMultiset.builder() + .add(new FileOperation(location, OperationType.INPUT_FILE_NEW_STREAM)) + .add(new FileOperation(location, OperationType.INPUT_FILE_LAST_MODIFIED)) + .build()); + assertReadOperations(location, content, + ImmutableMultiset.builder() + .add(new FileOperation(location, OperationType.INPUT_FILE_LAST_MODIFIED)) + .build()); + + byte[] modifiedContent = "modified content".getBytes(StandardCharsets.UTF_8); + try (OutputStream output = fileSystem.newOutputFile(location).createOrOverwrite()) { + output.write(modifiedContent); + } + + assertReadOperations(location, modifiedContent, + ImmutableMultiset.builder() + .add(new FileOperation(location, OperationType.INPUT_FILE_NEW_STREAM)) + .add(new FileOperation(location, OperationType.INPUT_FILE_LAST_MODIFIED)) + .build()); + } + + private Location getRootLocation() + { + return Location.of("memory://"); + } + + private void assertReadOperations(Location location, byte[] content, Multiset fileOperations) + throws IOException + { + TrinoInputFile file = fileSystem.newInputFile(location); + int length = (int) file.length(); + trackingFileSystemFactory.reset(); + try (TrinoInput input = file.newInput()) { + assertThat(input.readFully(0, length)).isEqualTo(Slices.wrappedBuffer(content)); + } + assertMultisetsEqual(fileOperations, getOperations()); + } + + private Multiset getOperations() + { + return trackingFileSystemFactory.getOperationCounts() + .entrySet().stream() + .flatMap(entry -> nCopies(entry.getValue(), new FileOperation( + entry.getKey().location(), + entry.getKey().operationType())).stream()) + .collect(toCollection(HashMultiset::create)); + } + + private record FileOperation(Location path, OperationType operationType) {} +} diff --git a/lib/trino-filesystem/src/test/java/io/trino/filesystem/cache/TestConsistentHashingCacheHostAddressProvider.java b/lib/trino-filesystem/src/test/java/io/trino/filesystem/cache/TestConsistentHashingCacheHostAddressProvider.java new file mode 100644 index 000000000000..3c8ef89d6478 --- /dev/null +++ b/lib/trino-filesystem/src/test/java/io/trino/filesystem/cache/TestConsistentHashingCacheHostAddressProvider.java @@ -0,0 +1,118 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.cache; + +import com.google.common.collect.Sets; +import io.trino.client.NodeVersion; +import io.trino.metadata.InternalNode; +import io.trino.spi.Node; +import io.trino.testing.TestingNodeManager; +import org.junit.jupiter.api.Test; + +import java.net.URI; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import static java.lang.Math.abs; +import static org.assertj.core.api.Assertions.assertThat; + +public class TestConsistentHashingCacheHostAddressProvider +{ + @Test + public void testConsistentHashing() + { + TestingNodeManager nodeManager = new TestingNodeManager(true); + nodeManager.addNode(node("test-1")); + nodeManager.addNode(node("test-2")); + nodeManager.addNode(node("test-3")); + ConsistentHashingHostAddressProvider provider = new ConsistentHashingHostAddressProvider( + nodeManager, + new ConsistentHashingHostAddressProviderConfiguration().setPreferredHostsCount(1)); + provider.refreshHashRing(); + assertFairDistribution(provider, nodeManager.getWorkerNodes()); + nodeManager.removeNode(node("test-2")); + provider.refreshHashRing(); + assertFairDistribution(provider, nodeManager.getWorkerNodes()); + nodeManager.addNode(node("test-4")); + nodeManager.addNode(node("test-5")); + provider.refreshHashRing(); + assertFairDistribution(provider, nodeManager.getWorkerNodes()); + } + + @Test + public void testConsistentHashingFairRedistribution() + { + TestingNodeManager nodeManager = new TestingNodeManager(true); + nodeManager.addNode(node("test-1")); + nodeManager.addNode(node("test-2")); + nodeManager.addNode(node("test-3")); + ConsistentHashingHostAddressProvider provider = new ConsistentHashingHostAddressProvider( + nodeManager, + new ConsistentHashingHostAddressProviderConfiguration().setPreferredHostsCount(1)); + provider.refreshHashRing(); + Map> distribution = getDistribution(provider); + nodeManager.removeNode(node("test-1")); + provider.refreshHashRing(); + Map> removeOne = getDistribution(provider); + assertMinimalRedistribution(distribution, removeOne); + nodeManager.addNode(node("test-1")); + provider.refreshHashRing(); + Map> addOne = getDistribution(provider); + assertMinimalRedistribution(removeOne, addOne); + assertThat(addOne).isEqualTo(distribution); + nodeManager.addNode(node("test-4")); + provider.refreshHashRing(); + Map> addTwo = getDistribution(provider); + assertMinimalRedistribution(addOne, addTwo); + } + + private static void assertFairDistribution(CachingHostAddressProvider cachingHostAddressProvider, Set nodeNames) + { + int n = 1000; + Map counts = new HashMap<>(); + for (int i = 0; i < n; i++) { + counts.merge(cachingHostAddressProvider.getHosts(String.valueOf(i)).get(0).getHostText(), 1, Math::addExact); + } + assertThat(nodeNames.stream().map(m -> m.getHostAndPort().getHostText()).collect(Collectors.toSet())).isEqualTo(counts.keySet()); + counts.values().forEach(c -> assertThat(abs(c - n / nodeNames.size()) < 0.1 * n).isTrue()); + } + + private void assertMinimalRedistribution(Map> oldDistribution, Map> newDistribution) + { + oldDistribution.entrySet().stream().filter(e -> newDistribution.containsKey(e.getKey())).forEach(entry -> { + int sameKeySize = Sets.intersection(newDistribution.get(entry.getKey()), entry.getValue()).size(); + int oldKeySize = entry.getValue().size(); + assertThat(abs(sameKeySize - oldKeySize) < oldKeySize / oldDistribution.size()).isTrue(); + }); + } + + private Map> getDistribution(ConsistentHashingHostAddressProvider provider) + { + int n = 1000; + Map> distribution = new HashMap<>(); + for (int i = 0; i < n; i++) { + String host = provider.getHosts(String.valueOf(i)).get(0).getHostText(); + distribution.computeIfAbsent(host, (k) -> new HashSet<>()).add(i); + } + return distribution; + } + + private static Node node(String nodeName) + { + return new InternalNode(nodeName, URI.create("http://" + nodeName + "/"), NodeVersion.UNKNOWN, false); + } +} diff --git a/lib/trino-filesystem/src/test/java/io/trino/filesystem/cache/TestingMemoryFileSystemCache.java b/lib/trino-filesystem/src/test/java/io/trino/filesystem/cache/TestingMemoryFileSystemCache.java new file mode 100644 index 000000000000..52725fbc85e2 --- /dev/null +++ b/lib/trino-filesystem/src/test/java/io/trino/filesystem/cache/TestingMemoryFileSystemCache.java @@ -0,0 +1,70 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.filesystem.cache; + +import io.trino.filesystem.Location; +import io.trino.filesystem.TrinoInput; +import io.trino.filesystem.TrinoInputFile; +import io.trino.filesystem.TrinoInputStream; +import io.trino.filesystem.memory.MemoryFileSystem; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.concurrent.atomic.AtomicInteger; + +public class TestingMemoryFileSystemCache + implements TrinoFileSystemCache +{ + private final MemoryFileSystem memoryCache = new MemoryFileSystem(); + private final AtomicInteger cacheGeneration = new AtomicInteger(0); + + @Override + public TrinoInput cacheInput(TrinoInputFile delegate, String key) + throws IOException + { + Location cacheLocation = Location.of("memory:///" + key.replace("memory:///", "") + key.hashCode() + cacheGeneration.get()); + TrinoInputFile cacheEntry = memoryCache.newInputFile(cacheLocation); + if (!cacheEntry.exists()) { + try (OutputStream output = memoryCache.newOutputFile(cacheLocation).create(); + InputStream input = delegate.newStream()) { + input.transferTo(output); + } + } + return cacheEntry.newInput(); + } + + @Override + public TrinoInputStream cacheStream(TrinoInputFile delegate, String key) + throws IOException + { + Location cacheLocation = Location.of("memory:///" + key.replace("memory:///", "") + key.hashCode() + cacheGeneration.get()); + TrinoInputFile cacheEntry = memoryCache.newInputFile(cacheLocation); + if (!cacheEntry.exists()) { + try (OutputStream output = memoryCache.newOutputFile(cacheLocation).create(); + InputStream input = delegate.newStream()) { + input.transferTo(output); + } + } + return cacheEntry.newStream(); + } + + @Override + public void expire(Location location) + throws IOException + { + // Expire the entire cache on a single invalidation + cacheGeneration.incrementAndGet(); + } +} diff --git a/plugin/trino-delta-lake/pom.xml b/plugin/trino-delta-lake/pom.xml index eb97342e88e3..223804703095 100644 --- a/plugin/trino-delta-lake/pom.xml +++ b/plugin/trino-delta-lake/pom.xml @@ -249,12 +249,24 @@ runtime + + io.trino + trino-filesystem-cache-alluxio + runtime + + io.trino trino-memory-context runtime + + org.alluxio + alluxio-core-common + runtime + + org.jetbrains annotations @@ -279,6 +291,12 @@ test + + io.trino + trino-client + test + + io.trino trino-exchange-filesystem @@ -299,10 +317,23 @@ test + + io.trino + trino-filesystem-cache-alluxio + test-jar + test + + io.trino trino-hdfs test + + + com.qubole.rubix + rubix-presto-shaded + + diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeModule.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeModule.java index 2801e8067b85..05955dc1c7e7 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeModule.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeModule.java @@ -21,9 +21,11 @@ import com.google.inject.Singleton; import com.google.inject.multibindings.Multibinder; import io.airlift.configuration.AbstractConfigurationAwareModule; +import io.trino.filesystem.cache.CacheKeyProvider; import io.trino.plugin.base.CatalogName; import io.trino.plugin.base.security.ConnectorAccessControlModule; import io.trino.plugin.base.session.SessionPropertiesProvider; +import io.trino.plugin.deltalake.cache.DeltaLakeCacheKeyProvider; import io.trino.plugin.deltalake.functions.tablechanges.TableChangesFunctionProvider; import io.trino.plugin.deltalake.functions.tablechanges.TableChangesProcessorProvider; import io.trino.plugin.deltalake.procedure.DropExtendedStatsProcedure; @@ -148,6 +150,8 @@ public void setup(Binder binder) newSetBinder(binder, ConnectorTableFunction.class).addBinding().toProvider(TableChangesFunctionProvider.class).in(Scopes.SINGLETON); binder.bind(FunctionProvider.class).to(DeltaLakeFunctionProvider.class).in(Scopes.SINGLETON); binder.bind(TableChangesProcessorProvider.class).in(Scopes.SINGLETON); + + newOptionalBinder(binder, CacheKeyProvider.class).setBinding().to(DeltaLakeCacheKeyProvider.class).in(Scopes.SINGLETON); } @Singleton diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplit.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplit.java index 6ae04c0907f4..fe857d748064 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplit.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplit.java @@ -14,14 +14,18 @@ package io.trino.plugin.deltalake; import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.airlift.slice.SizeOf; import io.trino.plugin.deltalake.transactionlog.DeletionVectorEntry; +import io.trino.spi.HostAddress; import io.trino.spi.SplitWeight; import io.trino.spi.connector.ConnectorSplit; import io.trino.spi.predicate.TupleDomain; +import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; @@ -45,6 +49,7 @@ public class DeltaLakeSplit private final Optional fileRowCount; private final long fileModifiedTime; private final Optional deletionVector; + private final List addresses; private final SplitWeight splitWeight; private final TupleDomain statisticsPredicate; private final Map> partitionKeys; @@ -61,6 +66,33 @@ public DeltaLakeSplit( @JsonProperty("splitWeight") SplitWeight splitWeight, @JsonProperty("statisticsPredicate") TupleDomain statisticsPredicate, @JsonProperty("partitionKeys") Map> partitionKeys) + { + this( + path, + start, + length, + fileSize, + fileRowCount, + fileModifiedTime, + deletionVector, + ImmutableList.of(), + splitWeight, + statisticsPredicate, + partitionKeys); + } + + public DeltaLakeSplit( + String path, + long start, + long length, + long fileSize, + Optional fileRowCount, + long fileModifiedTime, + Optional deletionVector, + List addresses, + SplitWeight splitWeight, + TupleDomain statisticsPredicate, + Map> partitionKeys) { this.path = requireNonNull(path, "path is null"); this.start = start; @@ -69,11 +101,20 @@ public DeltaLakeSplit( this.fileRowCount = requireNonNull(fileRowCount, "rowCount is null"); this.fileModifiedTime = fileModifiedTime; this.deletionVector = requireNonNull(deletionVector, "deletionVector is null"); + this.addresses = requireNonNull(addresses, "addresses is null"); this.splitWeight = requireNonNull(splitWeight, "splitWeight is null"); this.statisticsPredicate = requireNonNull(statisticsPredicate, "statisticsPredicate is null"); this.partitionKeys = requireNonNull(partitionKeys, "partitionKeys is null"); } + // do not serialize addresses as they are not needed on workers + @JsonIgnore + @Override + public List getAddresses() + { + return addresses; + } + @JsonProperty @Override public SplitWeight getSplitWeight() @@ -157,6 +198,7 @@ public Object getInfo() .put("path", path) .put("start", start) .put("length", length) + .put("addresses", addresses) .buildOrThrow(); } @@ -171,6 +213,7 @@ public String toString() .add("rowCount", fileRowCount) .add("fileModifiedTime", fileModifiedTime) .add("deletionVector", deletionVector) + .add("addresses", addresses) .add("statisticsPredicate", statisticsPredicate) .add("partitionKeys", partitionKeys) .toString(); @@ -193,6 +236,7 @@ public boolean equals(Object o) path.equals(that.path) && fileRowCount.equals(that.fileRowCount) && deletionVector.equals(that.deletionVector) && + Objects.equals(addresses, that.addresses) && Objects.equals(statisticsPredicate, that.statisticsPredicate) && Objects.equals(partitionKeys, that.partitionKeys); } @@ -200,6 +244,6 @@ public boolean equals(Object o) @Override public int hashCode() { - return Objects.hash(path, start, length, fileSize, fileRowCount, fileModifiedTime, deletionVector, statisticsPredicate, partitionKeys); + return Objects.hash(path, start, length, fileSize, fileRowCount, fileModifiedTime, deletionVector, addresses, statisticsPredicate, partitionKeys); } } diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitManager.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitManager.java index c40818a26305..9350d0b14815 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitManager.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitManager.java @@ -18,6 +18,7 @@ import io.airlift.units.DataSize; import io.trino.filesystem.Location; import io.trino.filesystem.TrinoFileSystemFactory; +import io.trino.filesystem.cache.CachingHostAddressProvider; import io.trino.plugin.base.classloader.ClassLoaderSafeConnectorSplitSource; import io.trino.plugin.deltalake.functions.tablechanges.TableChangesSplitSource; import io.trino.plugin.deltalake.functions.tablechanges.TableChangesTableFunctionHandle; @@ -85,6 +86,7 @@ public class DeltaLakeSplitManager private final double minimumAssignedSplitWeight; private final TrinoFileSystemFactory fileSystemFactory; private final DeltaLakeTransactionManager deltaLakeTransactionManager; + private final CachingHostAddressProvider cachingHostAddressProvider; @Inject public DeltaLakeSplitManager( @@ -93,7 +95,8 @@ public DeltaLakeSplitManager( ExecutorService executor, DeltaLakeConfig config, TrinoFileSystemFactory fileSystemFactory, - DeltaLakeTransactionManager deltaLakeTransactionManager) + DeltaLakeTransactionManager deltaLakeTransactionManager, + CachingHostAddressProvider cachingHostAddressProvider) { this.typeManager = requireNonNull(typeManager, "typeManager is null"); this.transactionLogAccess = requireNonNull(transactionLogAccess, "transactionLogAccess is null"); @@ -104,6 +107,7 @@ public DeltaLakeSplitManager( this.minimumAssignedSplitWeight = config.getMinimumAssignedSplitWeight(); this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); this.deltaLakeTransactionManager = requireNonNull(deltaLakeTransactionManager, "deltaLakeTransactionManager is null"); + this.cachingHostAddressProvider = requireNonNull(cachingHostAddressProvider, "cacheHostAddressProvider is null"); } @Override @@ -329,6 +333,7 @@ private List splitsForFile( addFileEntry.getStats().flatMap(DeltaLakeFileStatistics::getNumRecords), addFileEntry.getModificationTime(), addFileEntry.getDeletionVector(), + cachingHostAddressProvider.getHosts(splitPath), SplitWeight.standard(), statisticsPredicate, partitionKeys)); @@ -354,6 +359,7 @@ private List splitsForFile( Optional.empty(), addFileEntry.getModificationTime(), addFileEntry.getDeletionVector(), + cachingHostAddressProvider.getHosts(splitPath), SplitWeight.fromProportion(clamp((double) splitSize / maxSplitSize, minimumAssignedSplitWeight, 1.0)), statisticsPredicate, partitionKeys)); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/cache/DeltaLakeCacheKeyProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/cache/DeltaLakeCacheKeyProvider.java new file mode 100644 index 000000000000..9f62e38e4235 --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/cache/DeltaLakeCacheKeyProvider.java @@ -0,0 +1,36 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.cache; + +import io.trino.filesystem.TrinoInputFile; +import io.trino.filesystem.cache.CacheKeyProvider; + +import java.util.Optional; + +public class DeltaLakeCacheKeyProvider + implements CacheKeyProvider +{ + /** + * Get the cache key of a TrinoInputFile. Returns Optional.empty() if the file is not cacheable. + */ + @Override + public Optional getCacheKey(TrinoInputFile delegate) + { + // TODO: Consider caching of the Parquet checkpoint files within _delta_log + if (!delegate.location().path().contains("/_delta_log/")) { + return Optional.of(delegate.location().path()); + } + return Optional.empty(); + } +} diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAlluxioCacheFileOperations.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAlluxioCacheFileOperations.java new file mode 100644 index 000000000000..6fba7873a6b4 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAlluxioCacheFileOperations.java @@ -0,0 +1,389 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake; + +import com.google.common.collect.HashMultiset; +import com.google.common.collect.ImmutableMultiset; +import com.google.common.collect.Multiset; +import io.trino.Session; +import io.trino.filesystem.TrackingFileSystemFactory; +import io.trino.filesystem.TrackingFileSystemFactory.OperationType; +import io.trino.filesystem.TrinoFileSystemFactory; +import io.trino.filesystem.alluxio.AlluxioFileSystemCacheConfig; +import io.trino.filesystem.alluxio.AlluxioFileSystemCacheModule; +import io.trino.filesystem.alluxio.TestingAlluxioFileSystemCache; +import io.trino.filesystem.cache.CacheFileSystemFactory; +import io.trino.filesystem.cache.CachingHostAddressProvider; +import io.trino.filesystem.cache.NoneCachingHostAddressProvider; +import io.trino.filesystem.hdfs.HdfsFileSystemFactory; +import io.trino.plugin.deltalake.cache.DeltaLakeCacheKeyProvider; +import io.trino.testing.AbstractTestQueryFramework; +import io.trino.testing.DistributedQueryRunner; +import org.intellij.lang.annotations.Language; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; + +import java.io.File; +import java.nio.file.Path; +import java.util.Map; +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static com.google.inject.Scopes.SINGLETON; +import static io.trino.filesystem.TrackingFileSystemFactory.OperationType.INPUT_FILE_GET_LENGTH; +import static io.trino.filesystem.TrackingFileSystemFactory.OperationType.INPUT_FILE_NEW_STREAM; +import static io.trino.filesystem.alluxio.TestingAlluxioFileSystemCache.OperationType.CACHE_READ; +import static io.trino.filesystem.alluxio.TestingAlluxioFileSystemCache.OperationType.EXTERNAL_READ; +import static io.trino.plugin.base.util.Closables.closeAllSuppress; +import static io.trino.plugin.deltalake.TestDeltaLakeAlluxioCacheFileOperations.FileType.CDF_DATA; +import static io.trino.plugin.deltalake.TestDeltaLakeAlluxioCacheFileOperations.FileType.CHECKPOINT; +import static io.trino.plugin.deltalake.TestDeltaLakeAlluxioCacheFileOperations.FileType.DATA; +import static io.trino.plugin.deltalake.TestDeltaLakeAlluxioCacheFileOperations.FileType.LAST_CHECKPOINT; +import static io.trino.plugin.deltalake.TestDeltaLakeAlluxioCacheFileOperations.FileType.TRANSACTION_LOG_JSON; +import static io.trino.plugin.deltalake.TestDeltaLakeAlluxioCacheFileOperations.FileType.TRINO_EXTENDED_STATS_JSON; +import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; +import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_STATS; +import static io.trino.testing.MultisetAssertions.assertMultisetsEqual; +import static io.trino.testing.TestingSession.testSessionBuilder; +import static java.util.Collections.nCopies; +import static java.util.Objects.requireNonNull; +import static java.util.stream.Collectors.toCollection; + +// single-threaded AccessTrackingFileSystemFactory is shared mutable state +@Execution(ExecutionMode.SAME_THREAD) +public class TestDeltaLakeAlluxioCacheFileOperations + extends AbstractTestQueryFramework +{ + private TrackingFileSystemFactory trackingFileSystemFactory; + private TestingAlluxioFileSystemCache alluxioFileSystemCache; + + @Override + protected DistributedQueryRunner createQueryRunner() + throws Exception + { + Session session = testSessionBuilder() + .setCatalog("delta_lake") + .setSchema("default") + .build(); + DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(session) + .build(); + try { + File metastoreDirectory = queryRunner.getCoordinator().getBaseDataDir().resolve("delta_lake_metastore").toFile().getAbsoluteFile(); + trackingFileSystemFactory = new TrackingFileSystemFactory(new HdfsFileSystemFactory(HDFS_ENVIRONMENT, HDFS_FILE_SYSTEM_STATS)); + AlluxioFileSystemCacheConfig alluxioFileSystemCacheConfiguration = new AlluxioFileSystemCacheConfig() + .setCacheDirectories(metastoreDirectory.getAbsolutePath() + "/cache") + .disableTTL() + .setMaxCacheSizes("100MB"); + alluxioFileSystemCache = new TestingAlluxioFileSystemCache(AlluxioFileSystemCacheModule.getAlluxioConfiguration(alluxioFileSystemCacheConfiguration), new DeltaLakeCacheKeyProvider()); + TrinoFileSystemFactory fileSystemFactory = new CacheFileSystemFactory(trackingFileSystemFactory, alluxioFileSystemCache, alluxioFileSystemCache.getCacheKeyProvider()); + + Path dataDirectory = queryRunner.getCoordinator().getBaseDataDir().resolve("delta_lake_data"); + queryRunner.installPlugin(new TestingDeltaLakePlugin(dataDirectory, Optional.empty(), Optional.of(fileSystemFactory), binder -> binder.bind(CachingHostAddressProvider.class).to(NoneCachingHostAddressProvider.class).in(SINGLETON))); + queryRunner.createCatalog( + "delta_lake", + "delta_lake", + Map.of( + "hive.metastore", "file", + "hive.metastore.catalog.dir", metastoreDirectory.toURI().toString(), + "delta.enable-non-concurrent-writes", "true")); + + queryRunner.execute("CREATE SCHEMA " + session.getSchema().orElseThrow()); + return queryRunner; + } + catch (Throwable e) { + closeAllSuppress(e, queryRunner); + throw e; + } + } + + @Test + public void testCacheFileOperations() + { + assertUpdate("DROP TABLE IF EXISTS test_cache_file_operations"); + assertUpdate("CREATE TABLE test_cache_file_operations(key varchar, data varchar) with (partitioned_by=ARRAY['key'])"); + assertUpdate("INSERT INTO test_cache_file_operations VALUES ('p1', '1-abc')", 1); + assertUpdate("INSERT INTO test_cache_file_operations VALUES ('p2', '2-xyz')", 1); + assertUpdate("CALL system.flush_metadata_cache(schema_name => CURRENT_SCHEMA, table_name => 'test_cache_file_operations')"); + alluxioFileSystemCache.clear(); + assertFileSystemAccesses( + "SELECT * FROM test_cache_file_operations", + ImmutableMultiset.builder() + .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(DATA, "key=p1/", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(DATA, "key=p2/", INPUT_FILE_NEW_STREAM), 1) + // All data cached when reading parquet file footers to collect statistics when writing + .build(), + ImmutableMultiset.builder() + .addCopies(new CacheOperation(EXTERNAL_READ, "key=p1/"), 1) + .addCopies(new CacheOperation(EXTERNAL_READ, "key=p2/"), 1) + .build()); + assertFileSystemAccesses( + "SELECT * FROM test_cache_file_operations", + ImmutableMultiset.builder() + .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", INPUT_FILE_NEW_STREAM), 1) + .build(), + ImmutableMultiset.builder() + .addCopies(new CacheOperation(CACHE_READ, "key=p1/"), 1) + .addCopies(new CacheOperation(CACHE_READ, "key=p2/"), 1) + .build()); + assertUpdate("INSERT INTO test_cache_file_operations VALUES ('p3', '3-xyz')", 1); + assertUpdate("INSERT INTO test_cache_file_operations VALUES ('p4', '4-xyz')", 1); + assertUpdate("INSERT INTO test_cache_file_operations VALUES ('p5', '5-xyz')", 1); + alluxioFileSystemCache.clear(); + assertFileSystemAccesses( + "SELECT * FROM test_cache_file_operations", + ImmutableMultiset.builder() + .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000004.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000005.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000006.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(DATA, "key=p1/", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(DATA, "key=p2/", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(DATA, "key=p3/", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(DATA, "key=p4/", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(DATA, "key=p5/", INPUT_FILE_NEW_STREAM), 1) + // All data cached when reading parquet file footers to collect statistics when writing + .build(), + ImmutableMultiset.builder() + .addCopies(new CacheOperation(EXTERNAL_READ, "key=p1/"), 1) + .addCopies(new CacheOperation(EXTERNAL_READ, "key=p2/"), 1) + .addCopies(new CacheOperation(EXTERNAL_READ, "key=p3/"), 1) + .addCopies(new CacheOperation(EXTERNAL_READ, "key=p4/"), 1) + .addCopies(new CacheOperation(EXTERNAL_READ, "key=p5/"), 1) + .build()); + assertFileSystemAccesses( + "SELECT * FROM test_cache_file_operations", + ImmutableMultiset.builder() + .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000004.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000005.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000006.json", INPUT_FILE_NEW_STREAM), 1) + .build(), + ImmutableMultiset.builder() + .addCopies(new CacheOperation(CACHE_READ, "key=p1/"), 1) + .addCopies(new CacheOperation(CACHE_READ, "key=p2/"), 1) + .addCopies(new CacheOperation(CACHE_READ, "key=p3/"), 1) + .addCopies(new CacheOperation(CACHE_READ, "key=p4/"), 1) + .addCopies(new CacheOperation(CACHE_READ, "key=p5/"), 1) + .build()); + } + + @Test + public void testCacheCheckpointFileOperations() + { + assertUpdate("DROP TABLE IF EXISTS test_checkpoint_file_operations"); + assertUpdate("CREATE TABLE test_checkpoint_file_operations(key varchar, data varchar) with (checkpoint_interval = 2, partitioned_by=ARRAY['key'])"); + assertUpdate("INSERT INTO test_checkpoint_file_operations VALUES ('p1', '1-abc')", 1); + assertUpdate("INSERT INTO test_checkpoint_file_operations VALUES ('p2', '2-xyz')", 1); + assertUpdate("CALL system.flush_metadata_cache(schema_name => CURRENT_SCHEMA, table_name => 'test_checkpoint_file_operations')"); + alluxioFileSystemCache.clear(); + assertFileSystemAccesses( + "SELECT * FROM test_checkpoint_file_operations", + ImmutableMultiset.builder() + .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000002.checkpoint.parquet", INPUT_FILE_NEW_STREAM), 2) + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000002.checkpoint.parquet", INPUT_FILE_GET_LENGTH), 4) + .addCopies(new FileOperation(DATA, "key=p1/", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(DATA, "key=p2/", INPUT_FILE_NEW_STREAM), 1) + // All data cached when reading parquet file footers to collect statistics when writing + .build(), + ImmutableMultiset.builder() + .addCopies(new CacheOperation(EXTERNAL_READ, "key=p1/"), 1) + .addCopies(new CacheOperation(EXTERNAL_READ, "key=p2/"), 1) + .build()); + assertFileSystemAccesses( + "SELECT * FROM test_checkpoint_file_operations", + ImmutableMultiset.builder() + .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000002.checkpoint.parquet", INPUT_FILE_NEW_STREAM), 2) + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000002.checkpoint.parquet", INPUT_FILE_GET_LENGTH), 4) + .build(), + ImmutableMultiset.builder() + .addCopies(new CacheOperation(CACHE_READ, "key=p1/"), 1) + .addCopies(new CacheOperation(CACHE_READ, "key=p2/"), 1) + .build()); + assertUpdate("INSERT INTO test_checkpoint_file_operations VALUES ('p3', '3-xyz')", 1); + assertUpdate("INSERT INTO test_checkpoint_file_operations VALUES ('p4', '4-xyz')", 1); + assertUpdate("INSERT INTO test_checkpoint_file_operations VALUES ('p5', '5-xyz')", 1); + alluxioFileSystemCache.clear(); + assertFileSystemAccesses( + "SELECT * FROM test_checkpoint_file_operations", + ImmutableMultiset.builder() + .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000005.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000006.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000004.checkpoint.parquet", INPUT_FILE_NEW_STREAM), 2) + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000004.checkpoint.parquet", INPUT_FILE_GET_LENGTH), 4) + .addCopies(new FileOperation(DATA, "key=p1/", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(DATA, "key=p2/", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(DATA, "key=p3/", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(DATA, "key=p4/", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(DATA, "key=p5/", INPUT_FILE_NEW_STREAM), 1) + // All data cached when reading parquet file footers to collect statistics when writing + .build(), + ImmutableMultiset.builder() + .addCopies(new CacheOperation(EXTERNAL_READ, "key=p1/"), 1) + .addCopies(new CacheOperation(EXTERNAL_READ, "key=p2/"), 1) + .addCopies(new CacheOperation(EXTERNAL_READ, "key=p3/"), 1) + .addCopies(new CacheOperation(EXTERNAL_READ, "key=p4/"), 1) + .addCopies(new CacheOperation(EXTERNAL_READ, "key=p5/"), 1) + .build()); + assertFileSystemAccesses( + "SELECT * FROM test_checkpoint_file_operations", + ImmutableMultiset.builder() + .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000005.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000006.json", INPUT_FILE_NEW_STREAM), 1) + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000004.checkpoint.parquet", INPUT_FILE_NEW_STREAM), 2) + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000004.checkpoint.parquet", INPUT_FILE_GET_LENGTH), 4) + .build(), + ImmutableMultiset.builder() + .addCopies(new CacheOperation(CACHE_READ, "key=p1/"), 1) + .addCopies(new CacheOperation(CACHE_READ, "key=p2/"), 1) + .addCopies(new CacheOperation(CACHE_READ, "key=p3/"), 1) + .addCopies(new CacheOperation(CACHE_READ, "key=p4/"), 1) + .addCopies(new CacheOperation(CACHE_READ, "key=p5/"), 1) + .build()); + } + + private void assertFileSystemAccesses(@Language("SQL") String query, Multiset expectedAccesses, Multiset expectedCacheAccesses) + { + assertUpdate("CALL system.flush_metadata_cache()"); + DistributedQueryRunner queryRunner = getDistributedQueryRunner(); + trackingFileSystemFactory.reset(); + alluxioFileSystemCache.reset(); + queryRunner.executeWithPlan(queryRunner.getDefaultSession(), query); + assertMultisetsEqual(getOperations(), expectedAccesses); + assertMultisetsEqual(getCacheOperations(), expectedCacheAccesses); + } + + private Multiset getCacheOperations() + { + return alluxioFileSystemCache.getOperationCounts() + .entrySet().stream() + .filter(entry -> { + String path = entry.getKey().location().path(); + return !path.endsWith(".trinoSchema") && !path.contains(".trinoPermissions"); + }) + .flatMap(entry -> nCopies((int) entry.getValue().stream().filter(l -> l > 0).count(), CacheOperation.create( + entry.getKey().type(), + entry.getKey().location().path())).stream()) + .collect(toCollection(HashMultiset::create)); + } + + private static Pattern dataFilePattern = Pattern.compile(".*?/(?key=[^/]*/)?(?\\d{8}_\\d{6}_\\d{5}_\\w{5})_(?[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"); + + private record CacheOperation(TestingAlluxioFileSystemCache.OperationType type, String fileId) + { + public static CacheOperation create(TestingAlluxioFileSystemCache.OperationType operationType, String path) + { + String fileName = path.replaceFirst(".*/", ""); + if (!path.contains("_delta_log") && !path.contains("/.trino")) { + Matcher matcher = dataFilePattern.matcher(path); + if (matcher.matches()) { + return new CacheOperation(operationType, matcher.group("partition")); + } + } + else { + return new CacheOperation(operationType, fileName); + } + throw new IllegalArgumentException("File not recognized: " + path); + } + } + + private Multiset getOperations() + { + return trackingFileSystemFactory.getOperationCounts() + .entrySet().stream() + .filter(entry -> { + String path = entry.getKey().location().path(); + return !path.endsWith(".trinoSchema") && !path.contains(".trinoPermissions"); + }) + .flatMap(entry -> nCopies(entry.getValue(), FileOperation.create( + entry.getKey().location().path(), + entry.getKey().operationType())).stream()) + .collect(toCollection(HashMultiset::create)); + } + + private record FileOperation(FileType fileType, String fileId, OperationType operationType) + { + public static FileOperation create(String path, OperationType operationType) + { + String fileName = path.replaceFirst(".*/", ""); + if (path.matches(".*/_delta_log/_last_checkpoint")) { + return new FileOperation(LAST_CHECKPOINT, fileName, operationType); + } + if (path.matches(".*/_delta_log/\\d+\\.json")) { + return new FileOperation(TRANSACTION_LOG_JSON, fileName, operationType); + } + if (path.matches(".*/_delta_log/\\d+\\.checkpoint.parquet")) { + return new FileOperation(CHECKPOINT, fileName, operationType); + } + if (path.matches(".*/_delta_log/_trino_meta/extended_stats.json")) { + return new FileOperation(TRINO_EXTENDED_STATS_JSON, fileName, operationType); + } + if (path.matches(".*/_change_data/.*")) { + Matcher matcher = dataFilePattern.matcher(path); + if (matcher.matches()) { + return new FileOperation(CDF_DATA, matcher.group("partition"), operationType); + } + } + if (!path.contains("_delta_log")) { + Matcher matcher = dataFilePattern.matcher(path); + if (matcher.matches()) { + return new FileOperation(DATA, matcher.group("partition"), operationType); + } + } + throw new IllegalArgumentException("File not recognized: " + path); + } + + public FileOperation + { + requireNonNull(fileType, "fileType is null"); + requireNonNull(fileId, "fileId is null"); + requireNonNull(operationType, "operationType is null"); + } + } + + enum FileType + { + LAST_CHECKPOINT, + TRANSACTION_LOG_JSON, + CHECKPOINT, + TRINO_EXTENDED_STATS_JSON, + DATA, + CDF_DATA, + /**/; + } +} diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAlluxioCacheMinioAndHmsConnectorSmokeTest.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAlluxioCacheMinioAndHmsConnectorSmokeTest.java new file mode 100644 index 000000000000..265cf92488d7 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAlluxioCacheMinioAndHmsConnectorSmokeTest.java @@ -0,0 +1,73 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake; + +import com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Map; +import java.util.stream.Stream; + +/** + * Delta Lake connector smoke test exercising Hive metastore and MinIO storage with Alluxio caching. + */ +public class TestDeltaLakeAlluxioCacheMinioAndHmsConnectorSmokeTest + extends TestDeltaLakeMinioAndHmsConnectorSmokeTest +{ + private Path cacheDirectory; + + @BeforeAll + @Override + public void init() + throws Exception + { + cacheDirectory = Files.createTempDirectory("cache"); + super.init(); + } + + @AfterAll + @Override + public void cleanUp() + { + try (Stream walk = Files.walk(cacheDirectory)) { + Iterator iterator = walk.sorted(Comparator.reverseOrder()).iterator(); + while (iterator.hasNext()) { + Path path = iterator.next(); + Files.delete(path); + } + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + super.cleanUp(); + } + + @Override + protected Map deltaStorageConfiguration() + { + return ImmutableMap.builder() + .putAll(super.deltaStorageConfiguration()) + .put("fs.cache", "alluxio") + .put("fs.cache.directories", cacheDirectory.toAbsolutePath().toString()) + .put("fs.cache.max-sizes", "100MB") + .buildOrThrow(); + } +} diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java index 6a69945d3fb3..3fcfd924e2b8 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java @@ -21,6 +21,8 @@ import io.trino.SystemSessionProperties; import io.trino.filesystem.TrackingFileSystemFactory; import io.trino.filesystem.TrackingFileSystemFactory.OperationType; +import io.trino.filesystem.cache.CachingHostAddressProvider; +import io.trino.filesystem.cache.NoneCachingHostAddressProvider; import io.trino.filesystem.hdfs.HdfsFileSystemFactory; import io.trino.plugin.tpch.TpchPlugin; import io.trino.testing.AbstractTestQueryFramework; @@ -42,7 +44,7 @@ import java.util.regex.Pattern; import static com.google.common.base.MoreObjects.firstNonNull; -import static com.google.inject.util.Modules.EMPTY_MODULE; +import static com.google.inject.Scopes.SINGLETON; import static io.trino.filesystem.TrackingFileSystemFactory.OperationType.INPUT_FILE_EXISTS; import static io.trino.filesystem.TrackingFileSystemFactory.OperationType.INPUT_FILE_GET_LENGTH; import static io.trino.filesystem.TrackingFileSystemFactory.OperationType.INPUT_FILE_NEW_STREAM; @@ -95,7 +97,7 @@ protected QueryRunner createQueryRunner() queryRunner.createCatalog("tpch", "tpch"); Path dataDirectory = queryRunner.getCoordinator().getBaseDataDir().resolve("delta_lake_data"); - queryRunner.installPlugin(new TestingDeltaLakePlugin(dataDirectory, Optional.empty(), Optional.of(trackingFileSystemFactory), EMPTY_MODULE)); + queryRunner.installPlugin(new TestingDeltaLakePlugin(dataDirectory, Optional.empty(), Optional.of(trackingFileSystemFactory), binder -> binder.bind(CachingHostAddressProvider.class).to(NoneCachingHostAddressProvider.class).in(SINGLETON))); queryRunner.createCatalog( "delta_lake", "delta_lake", diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeMetadata.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeMetadata.java index 08f813df74e0..231cd4bdf220 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeMetadata.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeMetadata.java @@ -24,6 +24,8 @@ import io.airlift.json.JsonModule; import io.opentelemetry.api.trace.Tracer; import io.trino.filesystem.TrinoFileSystemFactory; +import io.trino.filesystem.cache.CachingHostAddressProvider; +import io.trino.filesystem.cache.NoneCachingHostAddressProvider; import io.trino.filesystem.hdfs.HdfsFileSystemFactory; import io.trino.hdfs.HdfsEnvironment; import io.trino.hdfs.TrinoHdfsFileSystemStats; @@ -205,6 +207,7 @@ public void setUp() binder.bind(HdfsEnvironment.class).toInstance(HDFS_ENVIRONMENT); binder.bind(TrinoHdfsFileSystemStats.class).toInstance(HDFS_FILE_SYSTEM_STATS); binder.bind(TrinoFileSystemFactory.class).to(HdfsFileSystemFactory.class).in(Scopes.SINGLETON); + binder.bind(CachingHostAddressProvider.class).to(NoneCachingHostAddressProvider.class).in(Scopes.SINGLETON); }, new AbstractModule() { diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java index fdadc7a134b3..48931352867b 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java @@ -20,6 +20,7 @@ import io.airlift.json.JsonCodecFactory; import io.airlift.units.DataSize; import io.trino.filesystem.Location; +import io.trino.filesystem.cache.NoneCachingHostAddressProvider; import io.trino.filesystem.hdfs.HdfsFileSystemFactory; import io.trino.filesystem.memory.MemoryFileSystemFactory; import io.trino.plugin.deltalake.statistics.CachingExtendedStatisticsAccess; @@ -239,7 +240,8 @@ public Stream getActiveFiles( MoreExecutors.newDirectExecutorService(), deltaLakeConfig, HDFS_FILE_SYSTEM_FACTORY, - deltaLakeTransactionManager); + deltaLakeTransactionManager, + new NoneCachingHostAddressProvider()); } private AddFileEntry addFileEntryOfSize(long fileSize) diff --git a/plugin/trino-iceberg/pom.xml b/plugin/trino-iceberg/pom.xml index 70f003c2bdb8..0d56ce3042af 100644 --- a/plugin/trino-iceberg/pom.xml +++ b/plugin/trino-iceberg/pom.xml @@ -362,6 +362,12 @@ runtime + + org.eclipse.jetty.toolchain + jetty-jakarta-servlet-api + runtime + + org.jetbrains annotations @@ -534,12 +540,6 @@ test - - org.eclipse.jetty.toolchain - jetty-jakarta-servlet-api - test - - org.junit.jupiter junit-jupiter-api diff --git a/pom.xml b/pom.xml index 86da560f4be0..371dc2ce538e 100644 --- a/pom.xml +++ b/pom.xml @@ -43,6 +43,7 @@ lib/trino-cache lib/trino-filesystem lib/trino-filesystem-azure + lib/trino-filesystem-cache-alluxio lib/trino-filesystem-gcs lib/trino-filesystem-manager lib/trino-filesystem-s3 @@ -176,6 +177,7 @@ 2.7.7-1 1.10.2 240 + 307 4.13.1 1.11.3 1.12.643 @@ -643,6 +645,12 @@ ${dep.wire.version} + + commons-cli + commons-cli + 1.6.0 + + commons-codec commons-codec @@ -1094,6 +1102,19 @@ ${project.version} + + io.trino + trino-filesystem-cache-alluxio + ${project.version} + + + + io.trino + trino-filesystem-cache-alluxio + ${project.version} + test-jar + + io.trino trino-filesystem-gcs @@ -1136,6 +1157,13 @@ ${project.version} + + io.trino + trino-hdfs + ${project.version} + test-jar + + io.trino trino-hive @@ -1668,6 +1696,122 @@ 2.3 + + org.alluxio + alluxio-core-client-fs + ${dep.alluxio.version} + + + io.grpc + grpc-core + + + io.grpc + grpc-stub + + + org.alluxio + alluxio-core-common + + + org.alluxio + alluxio-core-transport + + + org.apache.logging.log4j + log4j-core + + + + + + org.alluxio + alluxio-core-common + ${dep.alluxio.version} + + + commons-logging + commons-logging + + + io.etcd + jetcd-core + + + io.grpc + grpc-api + + + io.grpc + grpc-core + + + io.grpc + grpc-netty + + + io.grpc + grpc-services + + + io.grpc + grpc-stub + + + io.netty + netty-tcnative-boringssl-static + + + jakarta.servlet + jakarta.servlet-api + + + org.alluxio + alluxio-core-common + + + org.apache.curator + curator-client + + + org.apache.curator + curator-framework + + + org.apache.logging.log4j + log4j-core + + + org.apache.zookeeper + zookeeper + + + + + + org.alluxio + alluxio-shaded-client + 2.9.3 + + + commons-logging + commons-logging + + + log4j + log4j + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + + + + org.antlr antlr4-runtime @@ -2313,6 +2457,50 @@ mime.types + + + + org.alluxio + alluxio-core-client-fs + + + org.alluxio + alluxio-core-common + + + org.alluxio + alluxio-core-transport + + + + git.properties + + + + + + + com.qubole.rubix + rubix-presto-shaded + + + com.github.ishugaliy + allgood-consistent-hash + + + io.dropwizard.metrics + metrics-core + + + io.dropwizard.metrics + metrics-jvm + + + net.openhft + zero-allocation-hashing + + + diff --git a/testing/trino-faulttolerant-tests/pom.xml b/testing/trino-faulttolerant-tests/pom.xml index 7ceb0c9992bf..d0e5fe3800a7 100644 --- a/testing/trino-faulttolerant-tests/pom.xml +++ b/testing/trino-faulttolerant-tests/pom.xml @@ -185,6 +185,12 @@ io.trino trino-hdfs test + + + com.qubole.rubix + rubix-presto-shaded + + diff --git a/testing/trino-product-tests-groups/src/main/java/io/trino/tests/product/TestGroups.java b/testing/trino-product-tests-groups/src/main/java/io/trino/tests/product/TestGroups.java index 3b271972ed16..dfdbc6bd95a9 100644 --- a/testing/trino-product-tests-groups/src/main/java/io/trino/tests/product/TestGroups.java +++ b/testing/trino-product-tests-groups/src/main/java/io/trino/tests/product/TestGroups.java @@ -92,6 +92,7 @@ public final class TestGroups public static final String DELTA_LAKE_DATABRICKS_113 = "delta-lake-databricks-113"; public static final String DELTA_LAKE_DATABRICKS_122 = "delta-lake-databricks-122"; public static final String DELTA_LAKE_EXCLUDE_91 = "delta-lake-exclude-91"; + public static final String DELTA_LAKE_ALLUXIO_CACHING = "delta-lake-alluxio-caching"; public static final String HUDI = "hudi"; public static final String PARQUET = "parquet"; public static final String IGNITE = "ignite"; diff --git a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvMultinodeMinioDataLakeCaching.java b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvMultinodeMinioDataLakeCaching.java new file mode 100644 index 000000000000..ad70f50d8340 --- /dev/null +++ b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/env/environment/EnvMultinodeMinioDataLakeCaching.java @@ -0,0 +1,51 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.tests.product.launcher.env.environment; + +import com.google.common.collect.ImmutableMap; +import com.google.inject.Inject; +import io.trino.tests.product.launcher.docker.DockerFiles; +import io.trino.tests.product.launcher.env.Environment; +import io.trino.tests.product.launcher.env.EnvironmentProvider; +import io.trino.tests.product.launcher.env.common.Hadoop; +import io.trino.tests.product.launcher.env.common.Minio; +import io.trino.tests.product.launcher.env.common.StandardMultinode; +import io.trino.tests.product.launcher.env.common.TestsEnvironment; + +import static io.trino.tests.product.launcher.env.common.Standard.CONTAINER_TRINO_ETC; +import static org.testcontainers.utility.MountableFile.forHostPath; + +@TestsEnvironment +public final class EnvMultinodeMinioDataLakeCaching + extends EnvironmentProvider +{ + private static final String CONTAINER_TRINO_DELTA_LAKE_PROPERTIES = CONTAINER_TRINO_ETC + "/catalog/delta.properties"; + private static final String CONTAINER_TRINO_DELTA_LAKE_NON_CACHED_PROPERTIES = CONTAINER_TRINO_ETC + "/catalog/delta_non_cached.properties"; + private final DockerFiles.ResourceProvider configDir; + + @Inject + public EnvMultinodeMinioDataLakeCaching(StandardMultinode standardMultinode, Hadoop hadoop, Minio minio, DockerFiles dockerFiles) + { + super(standardMultinode, hadoop, minio); + this.configDir = dockerFiles.getDockerFilesHostDirectory("conf/environment"); + } + + @Override + public void extendEnvironment(Environment.Builder builder) + { + builder.addConnector("delta_lake", forHostPath(configDir.getPath("multinode-minio-data-lake/delta.properties")), CONTAINER_TRINO_DELTA_LAKE_NON_CACHED_PROPERTIES); + builder.addConnector("delta_lake", forHostPath(configDir.getPath("multinode-minio-data-lake-cached/delta.properties")), CONTAINER_TRINO_DELTA_LAKE_PROPERTIES); + builder.configureContainers(container -> container.withTmpFs(ImmutableMap.of("/tmp/cache", "rw"))); + } +} diff --git a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteDeltaLakeOss.java b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteDeltaLakeOss.java index fc3e0e47a30e..73f6e36f9eb3 100644 --- a/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteDeltaLakeOss.java +++ b/testing/trino-product-tests-launcher/src/main/java/io/trino/tests/product/launcher/suite/suites/SuiteDeltaLakeOss.java @@ -16,6 +16,7 @@ import com.google.common.collect.ImmutableList; import io.trino.tests.product.launcher.env.EnvironmentConfig; import io.trino.tests.product.launcher.env.environment.EnvMultinodeMinioDataLake; +import io.trino.tests.product.launcher.env.environment.EnvMultinodeMinioDataLakeCaching; import io.trino.tests.product.launcher.env.environment.EnvSinglenodeDeltaLakeKerberizedHdfs; import io.trino.tests.product.launcher.env.environment.EnvSinglenodeDeltaLakeOss; import io.trino.tests.product.launcher.suite.Suite; @@ -24,6 +25,7 @@ import java.util.List; import static io.trino.tests.product.TestGroups.CONFIGURED_FEATURES; +import static io.trino.tests.product.TestGroups.DELTA_LAKE_ALLUXIO_CACHING; import static io.trino.tests.product.TestGroups.DELTA_LAKE_HDFS; import static io.trino.tests.product.TestGroups.DELTA_LAKE_MINIO; import static io.trino.tests.product.TestGroups.DELTA_LAKE_OSS; @@ -48,6 +50,10 @@ public List getTestRuns(EnvironmentConfig config) // TODO: make the list of tests run here as close to those run on SinglenodeDeltaLakeDatabricks // e.g. replace `delta-lake-oss` group with `delta-lake-databricks` + any exclusions, of needed .withGroups(CONFIGURED_FEATURES, DELTA_LAKE_OSS) + .build(), + + testOnEnvironment(EnvMultinodeMinioDataLakeCaching.class) + .withGroups(CONFIGURED_FEATURES, DELTA_LAKE_ALLUXIO_CACHING) .build()); } } diff --git a/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/multinode-minio-data-lake-cached/delta.properties b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/multinode-minio-data-lake-cached/delta.properties new file mode 100644 index 000000000000..6b1da12198eb --- /dev/null +++ b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/multinode-minio-data-lake-cached/delta.properties @@ -0,0 +1,11 @@ +connector.name=delta_lake +hive.metastore.uri=thrift://hadoop-master:9083 +hive.s3.aws-access-key=minio-access-key +hive.s3.aws-secret-key=minio-secret-key +hive.s3.endpoint=http://minio:9080/ +hive.s3.path-style-access=true +hive.s3.ssl.enabled=false +delta.register-table-procedure.enabled=true +fs.cache=alluxio +fs.cache.directories=/tmp/cache/delta +fs.cache.max-disk-usage-percentages=90 diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeAlluxioCaching.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeAlluxioCaching.java new file mode 100644 index 000000000000..a098fe8c334d --- /dev/null +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeAlluxioCaching.java @@ -0,0 +1,91 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.tests.product.deltalake; + +import io.airlift.units.Duration; +import io.trino.tempto.ProductTest; +import io.trino.tests.product.deltalake.util.CachingTestUtils.CacheStats; +import org.testng.annotations.Test; + +import static io.airlift.testing.Assertions.assertGreaterThan; +import static io.airlift.testing.Assertions.assertGreaterThanOrEqual; +import static io.trino.tests.product.TestGroups.DELTA_LAKE_ALLUXIO_CACHING; +import static io.trino.tests.product.TestGroups.PROFILE_SPECIFIC_TESTS; +import static io.trino.tests.product.deltalake.util.CachingTestUtils.getCacheStats; +import static io.trino.tests.product.utils.QueryAssertions.assertEventually; +import static io.trino.tests.product.utils.QueryExecutors.onTrino; +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.assertj.core.api.Assertions.assertThat; +import static org.testng.Assert.assertEquals; + +public class TestDeltaLakeAlluxioCaching + extends ProductTest +{ + @Test(groups = {DELTA_LAKE_ALLUXIO_CACHING, PROFILE_SPECIFIC_TESTS}) + public void testReadFromCache() + { + testReadFromTable("table1"); + testReadFromTable("table2"); + } + + private void testReadFromTable(String tableNameSuffix) + { + String cachedTableName = "delta.default.test_cache_read" + tableNameSuffix; + String nonCachedTableName = "delta_non_cached.default.test_cache_read" + tableNameSuffix; + + createTestTable(cachedTableName); + + CacheStats beforeCacheStats = getCacheStats("delta"); + + long tableSize = (Long) onTrino().executeQuery("SELECT SUM(size) as size FROM (SELECT \"$path\", \"$file_size\" AS size FROM " + nonCachedTableName + " GROUP BY 1, 2)").getOnlyValue(); + + assertThat(onTrino().executeQuery("SELECT * FROM " + cachedTableName)).hasAnyRows(); + + assertEventually( + new Duration(20, SECONDS), + () -> { + // first query via caching catalog should fetch external data + CacheStats afterQueryCacheStats = getCacheStats("delta"); + assertGreaterThanOrEqual(afterQueryCacheStats.cacheSpaceUsed(), beforeCacheStats.cacheSpaceUsed() + tableSize); + assertGreaterThan(afterQueryCacheStats.externalReads(), beforeCacheStats.externalReads()); + assertGreaterThanOrEqual(afterQueryCacheStats.cacheReads(), beforeCacheStats.cacheReads()); + }); + + assertEventually( + new Duration(10, SECONDS), + () -> { + CacheStats beforeQueryCacheStats = getCacheStats("delta"); + + assertThat(onTrino().executeQuery("SELECT * FROM " + cachedTableName)).hasAnyRows(); + + // query via caching catalog should read exclusively from cache + CacheStats afterQueryCacheStats = getCacheStats("delta"); + assertGreaterThan(afterQueryCacheStats.cacheReads(), beforeQueryCacheStats.cacheReads()); + assertEquals(afterQueryCacheStats.externalReads(), beforeQueryCacheStats.externalReads()); + assertEquals(afterQueryCacheStats.cacheSpaceUsed(), beforeQueryCacheStats.cacheSpaceUsed()); + }); + + onTrino().executeQuery("DROP TABLE " + nonCachedTableName); + } + + /** + * Creates a table which should contain around 6 2 MB parquet files + */ + private void createTestTable(String tableName) + { + onTrino().executeQuery("DROP TABLE IF EXISTS " + tableName); + onTrino().executeQuery("SET SESSION delta.target_max_file_size = '2MB'"); + onTrino().executeQuery("CREATE TABLE " + tableName + " AS SELECT * FROM tpch.sf1.customer"); + } +} diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/util/CachingTestUtils.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/util/CachingTestUtils.java new file mode 100644 index 000000000000..39e73ec24e83 --- /dev/null +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/util/CachingTestUtils.java @@ -0,0 +1,45 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.tests.product.deltalake.util; + +import io.trino.tempto.query.QueryResult; + +import static com.google.common.collect.Iterables.getOnlyElement; +import static io.trino.tests.product.utils.QueryExecutors.onTrino; + +public final class CachingTestUtils +{ + private CachingTestUtils() {} + + public static CacheStats getCacheStats(String catalog) + { + QueryResult queryResult = onTrino().executeQuery("SELECT " + + " sum(\"cachereads.alltime.count\") as cachereads, " + + " sum(\"externalreads.alltime.count\") as externalreads " + + "FROM jmx.current.\"io.trino.filesystem.alluxio:name=" + catalog + ",type=alluxiocachestats\";"); + + double cacheReads = (Double) getOnlyElement(queryResult.rows()) + .get(queryResult.tryFindColumnIndex("cachereads").get() - 1); + + double externalReads = (Double) getOnlyElement(queryResult.rows()) + .get(queryResult.tryFindColumnIndex("externalreads").get() - 1); + + long cacheSpaceUsed = (Long) onTrino().executeQuery("SELECT sum(count) FROM " + + "jmx.current.\"org.alluxio:name=client.cachespaceusedcount,type=counters\"").getOnlyValue(); + + return new CacheStats(cacheReads, externalReads, cacheSpaceUsed); + } + + public record CacheStats(double cacheReads, double externalReads, long cacheSpaceUsed) {} +}