Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static io.airlift.slice.Slices.utf8Slice;
import static io.trino.plugin.deltalake.DeltaLakeColumnHandle.pathColumnHandle;
import static io.trino.plugin.deltalake.DeltaLakeMetadata.createStatisticsPredicate;
import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.getDynamicFilteringWaitTimeout;
import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.getMaxInitialSplitSize;
Expand Down Expand Up @@ -131,6 +133,7 @@ private Stream<DeltaLakeSplit> getSplits(
List<AddFileEntry> validDataFiles = metastore.getValidDataFiles(tableHandle.getSchemaTableName(), session);
TupleDomain<DeltaLakeColumnHandle> enforcedPartitionConstraint = tableHandle.getEnforcedPartitionConstraint();
TupleDomain<DeltaLakeColumnHandle> nonPartitionConstraint = tableHandle.getNonPartitionConstraint();
Domain pathDomain = getPathDomain(nonPartitionConstraint);

// Delta Lake handles updates and deletes by copying entire data files, minus updates/deletes. Because of this we can only have one Split/UpdatablePageSource
// per file.
Expand All @@ -157,6 +160,11 @@ private Stream<DeltaLakeSplit> getSplits(
return Stream.empty();
}

String splitPath = buildSplitPath(tableLocation, addAction);
Comment thread
ebyhr marked this conversation as resolved.
Outdated
if (!pathMatchesPredicate(pathDomain, splitPath)) {
return Stream.empty();
}

if (filesModifiedAfter.isPresent() && addAction.getModificationTime() <= filesModifiedAfter.get().toEpochMilli()) {
return Stream.empty();
}
Expand Down Expand Up @@ -194,7 +202,7 @@ private Stream<DeltaLakeSplit> getSplits(
return splitsForFile(
session,
addAction,
tableLocation,
splitPath,
addAction.getCanonicalPartitionValues(),
statisticsPredicate,
splittable,
Expand All @@ -215,17 +223,28 @@ public static boolean partitionMatchesPredicate(Map<String, Optional<String>> pa
return true;
}

private static Domain getPathDomain(TupleDomain<DeltaLakeColumnHandle> effectivePredicate)
{
return effectivePredicate.getDomains()
.flatMap(domains -> Optional.ofNullable(domains.get(pathColumnHandle())))
.orElse(Domain.all(pathColumnHandle().getType()));
}

private static boolean pathMatchesPredicate(Domain pathDomain, String path)
{
return pathDomain.includesNullableValue(utf8Slice(path));
}

private List<DeltaLakeSplit> splitsForFile(
ConnectorSession session,
AddFileEntry addFileEntry,
String tableLocation,
String splitPath,
Map<String, Optional<String>> partitionKeys,
TupleDomain<DeltaLakeColumnHandle> statisticsPredicate,
boolean splittable,
AtomicInteger remainingInitialSplits)
{
long fileSize = addFileEntry.getSize();
String splitPath = buildSplitPath(tableLocation, addFileEntry);

if (!splittable) {
// remainingInitialSplits is not used when !splittable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,24 @@ public void testTargetMaxFileSize()
}
}

@Test
public void testPathColumn()
{
try (TestTable table = new TestTable(getQueryRunner()::execute, "test_path_column", "(x VARCHAR)")) {
assertUpdate("INSERT INTO " + table.getName() + " SELECT 'first'", 1);
String firstFilePath = (String) computeScalar("SELECT \"$path\" FROM " + table.getName());
assertUpdate("INSERT INTO " + table.getName() + " SELECT 'second'", 1);
String secondFilePath = (String) computeScalar("SELECT \"$path\" FROM " + table.getName() + " WHERE x = 'second'");

// Verify predicate correctness on $path column
assertQuery("SELECT x FROM " + table.getName() + " WHERE \"$path\" = '" + firstFilePath + "'", "VALUES 'first'");
assertQuery("SELECT x FROM " + table.getName() + " WHERE \"$path\" <> '" + firstFilePath + "'", "VALUES 'second'");
assertQuery("SELECT x FROM " + table.getName() + " WHERE \"$path\" IN ('" + firstFilePath + "', '" + secondFilePath + "')", "VALUES ('first'), ('second')");
assertQuery("SELECT x FROM " + table.getName() + " WHERE \"$path\" IS NOT NULL", "VALUES ('first'), ('second')");
assertQueryReturnsEmptyResult("SELECT x FROM " + table.getName() + " WHERE \"$path\" IS NULL");
Comment thread
ebyhr marked this conversation as resolved.
Outdated
}
}

@Override
protected String createSchemaSql(String schemaName)
{
Expand Down