Skip to content

Commit 04aceb0

Browse files
committed
Improve HashBuilderOperator unspill parallelism
HashBuilderOperator is unspilling sequentially partition by partition. This commit improves join unspilling performance by making FileSingleStreamSpiller unspill single partition in parallel. FileSingleStreamSpiller is enhanced so it can spill to/unspill from multiple files.
1 parent 7acaf11 commit 04aceb0

File tree

11 files changed

+306
-48
lines changed

11 files changed

+306
-48
lines changed

core/trino-main/src/main/java/io/trino/operator/join/HashBuilderOperator.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,8 @@ private ListenableFuture<Void> spillIndex()
411411
spiller = Optional.of(singleStreamSpillerFactory.create(
412412
index.getTypes(),
413413
operatorContext.getSpillContext().newLocalSpillContext(),
414-
operatorContext.newLocalUserMemoryContext(HashBuilderOperator.class.getSimpleName())));
414+
operatorContext.newLocalUserMemoryContext(HashBuilderOperator.class.getSimpleName()),
415+
true));
415416
long spillStartNanos = System.nanoTime();
416417
ListenableFuture<DataSize> spillFuture = getSpiller().spill(index.getPages());
417418
addSuccessCallback(spillFuture, dataSize -> {

core/trino-main/src/main/java/io/trino/spiller/FileSingleStreamSpiller.java

Lines changed: 129 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,15 @@
4242
import java.io.UncheckedIOException;
4343
import java.nio.file.Files;
4444
import java.nio.file.Path;
45+
import java.util.ArrayList;
4546
import java.util.Iterator;
4647
import java.util.List;
4748
import java.util.Optional;
4849
import java.util.concurrent.atomic.AtomicBoolean;
4950
import java.util.concurrent.atomic.AtomicLong;
5051

5152
import static com.google.common.base.Preconditions.checkState;
53+
import static com.google.common.collect.Iterators.transform;
5254
import static com.google.common.util.concurrent.Futures.immediateFuture;
5355
import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR;
5456
import static io.trino.spiller.FileSingleStreamSpillerFactory.SPILL_FILE_PREFIX;
@@ -63,7 +65,9 @@ public class FileSingleStreamSpiller
6365
@VisibleForTesting
6466
static final int BUFFER_SIZE = 4 * 1024;
6567

66-
private final FileHolder targetFile;
68+
private final List<FileHolder> targetFiles;
69+
private volatile int currentFileIndex;
70+
6771
private final Closer closer = Closer.create();
6872
private final PagesSerdeFactory serdeFactory;
6973
private volatile Optional<SecretKey> encryptionKey;
@@ -84,7 +88,7 @@ public FileSingleStreamSpiller(
8488
PagesSerdeFactory serdeFactory,
8589
Optional<SecretKey> encryptionKey,
8690
ListeningExecutorService executor,
87-
Path spillPath,
91+
List<Path> spillPaths,
8892
SpillerStats spillerStats,
8993
SpillContext spillContext,
9094
LocalMemoryContext memoryContext,
@@ -109,8 +113,14 @@ public FileSingleStreamSpiller(
109113
// middle of execution when close() is called (note that this applies to both readPages() and writePages() methods).
110114
this.memoryContext.setBytes(BUFFER_SIZE);
111115
this.fileSystemErrorHandler = requireNonNull(fileSystemErrorHandler, "filesystemErrorHandler is null");
116+
requireNonNull(spillPaths, "spillPaths is null");
117+
checkState(!spillPaths.isEmpty(), "spillPaths is empty");
112118
try {
113-
this.targetFile = closer.register(new FileHolder(Files.createTempFile(spillPath, SPILL_FILE_PREFIX, SPILL_FILE_SUFFIX)));
119+
ImmutableList.Builder<FileHolder> builder = ImmutableList.builder();
120+
for (Path path : spillPaths) {
121+
builder.add(closer.register(new FileHolder(Files.createTempFile(path, SPILL_FILE_PREFIX, SPILL_FILE_SUFFIX))));
122+
}
123+
this.targetFiles = builder.build();
114124
}
115125
catch (IOException e) {
116126
this.fileSystemErrorHandler.run();
@@ -137,61 +147,129 @@ public long getSpilledPagesInMemorySize()
137147
public Iterator<Page> getSpilledPages()
138148
{
139149
checkNoSpillInProgress();
140-
return readPages();
150+
checkState(writable.getAndSet(false), "Repeated reads are disallowed to prevent potential resource leaks");
151+
152+
try {
153+
Optional<SecretKey> encryptionKey = this.encryptionKey;
154+
checkState(encrypted == encryptionKey.isPresent(), "encryptionKey has been discarded");
155+
PageDeserializer deserializer = serdeFactory.createDeserializer(encryptionKey);
156+
this.encryptionKey = Optional.empty();
157+
158+
int fileCount = targetFiles.size();
159+
List<Iterator<Page>> iterators = new ArrayList<>(targetFiles.size());
160+
for (FileHolder file : targetFiles) {
161+
iterators.add(readFilePages(deserializer, file, closer));
162+
}
163+
164+
return new AbstractIterator<>()
165+
{
166+
int fileIndex;
167+
168+
@Override
169+
protected Page computeNext()
170+
{
171+
Iterator<Page> iterator = iterators.get(fileIndex);
172+
if (!iterator.hasNext()) {
173+
return endOfData();
174+
}
175+
176+
Page page = iterator.next();
177+
fileIndex = (fileIndex + 1) % fileCount;
178+
return page;
179+
}
180+
};
181+
}
182+
catch (IOException e) {
183+
fileSystemErrorHandler.run();
184+
throw new TrinoException(GENERIC_INTERNAL_ERROR, "Failed to read spilled pages", e);
185+
}
141186
}
142187

143188
@Override
144189
public ListenableFuture<List<Page>> getAllSpilledPages()
145190
{
146-
return executor.submit(() -> ImmutableList.copyOf(getSpilledPages()));
191+
checkNoSpillInProgress();
192+
checkState(writable.getAndSet(false), "Repeated reads are disallowed to prevent potential resource leaks");
193+
194+
Optional<SecretKey> encryptionKey = this.encryptionKey;
195+
checkState(encrypted == encryptionKey.isPresent(), "encryptionKey has been discarded");
196+
this.encryptionKey = Optional.empty();
197+
198+
List<ListenableFuture<List<Page>>> futures = new ArrayList<>();
199+
for (FileHolder file : targetFiles) {
200+
futures.add(executor.submit(() -> {
201+
PageDeserializer deserializer = serdeFactory.createDeserializer(encryptionKey);
202+
ImmutableList.Builder<Page> pages = ImmutableList.builder();
203+
try (Closer closer = Closer.create()) {
204+
readFilePages(deserializer, file, closer).forEachRemaining(pages::add);
205+
}
206+
return pages.build();
207+
}));
208+
}
209+
210+
// Combine pages from all spill files according to the round-robin order.
211+
return Futures.transform(Futures.allAsList(futures), pagesPerFile -> {
212+
ImmutableList.Builder<Page> builder = ImmutableList.builder();
213+
int fileCount = targetFiles.size();
214+
215+
List<Iterator<Page>> iterators = new ArrayList<>(fileCount);
216+
for (List<Page> pages : pagesPerFile) {
217+
iterators.add(pages.iterator());
218+
}
219+
220+
int fileIndex = 0;
221+
while (true) {
222+
Iterator<Page> iterator = iterators.get(fileIndex);
223+
if (!iterator.hasNext()) {
224+
break;
225+
}
226+
builder.add(iterator.next());
227+
fileIndex = (fileIndex + 1) % fileCount;
228+
}
229+
return builder.build();
230+
}, executor);
147231
}
148232

149-
private DataSize writePages(Iterator<Page> pageIterator)
233+
private DataSize writePages(Iterator<Page> pages)
150234
{
151235
checkState(writable.get(), "Spilling no longer allowed. The spiller has been made non-writable on first read for subsequent reads to be consistent");
152236

153237
Optional<SecretKey> encryptionKey = this.encryptionKey;
154238
checkState(encrypted == encryptionKey.isPresent(), "encryptionKey has been discarded");
155239
PageSerializer serializer = serdeFactory.createSerializer(encryptionKey);
240+
156241
long spilledPagesBytes = 0;
157-
try (SliceOutput output = new OutputStreamSliceOutput(targetFile.newOutputStream(APPEND), BUFFER_SIZE)) {
158-
while (pageIterator.hasNext()) {
159-
Page page = pageIterator.next();
242+
int fileIndex = currentFileIndex;
243+
int fileCount = targetFiles.size();
244+
245+
try {
246+
while (pages.hasNext()) {
247+
Page page = pages.next();
160248
long pageSizeInBytes = page.getSizeInBytes();
249+
Slice serialized = serializer.serialize(page);
250+
long serializedPageSize = serialized.length();
251+
252+
try (SliceOutput out = newSliceOutput(fileIndex)) {
253+
out.writeBytes(serialized);
254+
}
255+
161256
spilledPagesBytes += pageSizeInBytes;
257+
162258
spilledPagesInMemorySize.addAndGet(pageSizeInBytes);
163-
Slice serializedPage = serializer.serialize(page);
164-
long pageSize = serializedPage.length();
165-
localSpillContext.updateBytes(pageSize);
166-
spillerStats.addToTotalSpilledBytes(pageSize);
167-
output.writeBytes(serializedPage);
259+
localSpillContext.updateBytes(serializedPageSize);
260+
spillerStats.addToTotalSpilledBytes(serializedPageSize);
261+
262+
fileIndex = (fileIndex + 1) % fileCount;
168263
}
264+
265+
currentFileIndex = fileIndex;
169266
}
170267
catch (UncheckedIOException | IOException e) {
171268
fileSystemErrorHandler.run();
172269
throw new TrinoException(GENERIC_INTERNAL_ERROR, "Failed to spill pages", e);
173270
}
174-
return DataSize.ofBytes(spilledPagesBytes);
175-
}
176271

177-
private Iterator<Page> readPages()
178-
{
179-
checkState(writable.getAndSet(false), "Repeated reads are disallowed to prevent potential resource leaks");
180-
181-
try {
182-
Optional<SecretKey> encryptionKey = this.encryptionKey;
183-
checkState(encrypted == encryptionKey.isPresent(), "encryptionKey has been discarded");
184-
PageDeserializer deserializer = serdeFactory.createDeserializer(encryptionKey);
185-
// encryption key is safe to discard since it now belongs to the PageDeserializer and repeated reads are disallowed
186-
this.encryptionKey = Optional.empty();
187-
InputStream input = closer.register(targetFile.newInputStream());
188-
Iterator<Page> pages = PagesSerdeUtil.readPages(deserializer, input);
189-
return closeWhenExhausted(pages, input);
190-
}
191-
catch (IOException e) {
192-
fileSystemErrorHandler.run();
193-
throw new TrinoException(GENERIC_INTERNAL_ERROR, "Failed to read spilled pages", e);
194-
}
272+
return DataSize.ofBytes(spilledPagesBytes);
195273
}
196274

197275
@Override
@@ -215,6 +293,23 @@ private void checkNoSpillInProgress()
215293
checkState(spillInProgress.isDone(), "spill in progress");
216294
}
217295

296+
private SliceOutput newSliceOutput(int fileIndex)
297+
throws IOException
298+
{
299+
return new OutputStreamSliceOutput(targetFiles.get(fileIndex).newOutputStream(APPEND), BUFFER_SIZE);
300+
}
301+
302+
/**
303+
* Returns an iterator that exposes all pages stored in the given file.
304+
* Pages are lazily deserialized as the iterator is consumed.
305+
*/
306+
private Iterator<Page> readFilePages(PageDeserializer deserializer, FileHolder file, Closer closer)
307+
throws IOException
308+
{
309+
InputStream input = closer.register(file.newInputStream());
310+
return transform(closeWhenExhausted(PagesSerdeUtil.readSerializedPages(input), input), deserializer::deserialize);
311+
}
312+
218313
private static <T> Iterator<T> closeWhenExhausted(Iterator<T> iterator, Closeable resource)
219314
{
220315
requireNonNull(iterator, "iterator is null");

core/trino-main/src/main/java/io/trino/spiller/FileSingleStreamSpillerFactory.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import io.airlift.log.Logger;
2323
import io.trino.FeaturesConfig;
2424
import io.trino.cache.NonKeyEvictableLoadingCache;
25+
import io.trino.execution.TaskManagerConfig;
2526
import io.trino.execution.buffer.CompressionCodec;
2627
import io.trino.execution.buffer.PagesSerdeFactory;
2728
import io.trino.memory.context.LocalMemoryContext;
@@ -80,11 +81,12 @@ public class FileSingleStreamSpillerFactory
8081
private final SpillerStats spillerStats;
8182
private final double maxUsedSpaceThreshold;
8283
private final boolean spillEncryptionEnabled;
84+
private final int spillFileCount;
8385
private int roundRobinIndex;
8486
private final NonKeyEvictableLoadingCache<Path, Boolean> spillPathHealthCache;
8587

8688
@Inject
87-
public FileSingleStreamSpillerFactory(BlockEncodingSerde blockEncodingSerde, SpillerStats spillerStats, FeaturesConfig featuresConfig, NodeSpillConfig nodeSpillConfig)
89+
public FileSingleStreamSpillerFactory(BlockEncodingSerde blockEncodingSerde, SpillerStats spillerStats, FeaturesConfig featuresConfig, NodeSpillConfig nodeSpillConfig, TaskManagerConfig taskManagerConfig)
8890
{
8991
this(
9092
listeningDecorator(newFixedThreadPool(
@@ -93,6 +95,7 @@ public FileSingleStreamSpillerFactory(BlockEncodingSerde blockEncodingSerde, Spi
9395
requireNonNull(blockEncodingSerde, "blockEncodingSerde is null"),
9496
spillerStats,
9597
featuresConfig.getSpillerSpillPaths(),
98+
Math.min(featuresConfig.getSpillerThreads(), taskManagerConfig.getTaskConcurrency()),
9699
featuresConfig.getSpillMaxUsedSpaceThreshold(),
97100
nodeSpillConfig.getSpillCompressionCodec(),
98101
nodeSpillConfig.isSpillEncryptionEnabled());
@@ -104,6 +107,7 @@ public FileSingleStreamSpillerFactory(
104107
BlockEncodingSerde blockEncodingSerde,
105108
SpillerStats spillerStats,
106109
List<Path> spillPaths,
110+
int spillFileCount,
107111
double maxUsedSpaceThreshold,
108112
CompressionCodec compressionCodec,
109113
boolean spillEncryptionEnabled)
@@ -124,6 +128,7 @@ public FileSingleStreamSpillerFactory(
124128
throw new IllegalArgumentException(format("spill path %s is not accessible, it must be +rwx; adjust %s config property or filesystem permissions", path, SPILLER_SPILL_PATH));
125129
}
126130
});
131+
this.spillFileCount = spillFileCount;
127132
this.maxUsedSpaceThreshold = maxUsedSpaceThreshold;
128133
this.spillEncryptionEnabled = spillEncryptionEnabled;
129134
this.roundRobinIndex = 0;
@@ -165,14 +170,19 @@ private static void cleanupOldSpillFiles(Path path)
165170
}
166171

167172
@Override
168-
public SingleStreamSpiller create(List<Type> types, SpillContext spillContext, LocalMemoryContext memoryContext)
173+
public SingleStreamSpiller create(List<Type> types, SpillContext spillContext, LocalMemoryContext memoryContext, boolean parallelSpill)
169174
{
170175
Optional<SecretKey> encryptionKey = spillEncryptionEnabled ? Optional.of(createRandomAesEncryptionKey()) : Optional.empty();
176+
ImmutableList.Builder<Path> paths = ImmutableList.builder();
177+
int spillFileCount = parallelSpill ? this.spillFileCount : 1;
178+
for (int i = 0; i < spillFileCount; i++) {
179+
paths.add(getNextSpillPath());
180+
}
171181
return new FileSingleStreamSpiller(
172182
serdeFactory,
173183
encryptionKey,
174184
executor,
175-
getNextSpillPath(),
185+
paths.build(),
176186
spillerStats,
177187
spillContext,
178188
memoryContext,

core/trino-main/src/main/java/io/trino/spiller/SingleStreamSpillerFactory.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,16 @@
2121

2222
public interface SingleStreamSpillerFactory
2323
{
24-
SingleStreamSpiller create(List<Type> types, SpillContext spillContext, LocalMemoryContext memoryContext);
24+
default SingleStreamSpiller create(List<Type> types, SpillContext spillContext, LocalMemoryContext memoryContext)
25+
{
26+
return create(types, spillContext, memoryContext, false);
27+
}
28+
29+
SingleStreamSpiller create(List<Type> types, SpillContext spillContext, LocalMemoryContext memoryContext, boolean parallelSpill);
2530

2631
static SingleStreamSpillerFactory unsupportedSingleStreamSpillerFactory()
2732
{
28-
return (types, spillContext, memoryContext) -> {
33+
return (types, spillContext, memoryContext, parallelSpill) -> {
2934
throw new UnsupportedOperationException();
3035
};
3136
}

core/trino-main/src/test/java/io/trino/execution/TaskTestUtils.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,12 +177,12 @@ public static LocalExecutionPlanner createTestingPlanner()
177177
new JoinFilterFunctionCompiler(PLANNER_CONTEXT.getFunctionManager()),
178178
new IndexJoinLookupStats(),
179179
new TaskManagerConfig(),
180-
new GenericSpillerFactory((types, spillContext, memoryContext) -> {
180+
new GenericSpillerFactory((types, spillContext, memoryContext, parallelSpill) -> {
181181
throw new UnsupportedOperationException();
182182
}),
183183
new QueryDataEncoders(new SpoolingEnabledConfig(), Set.of()),
184184
Optional.empty(),
185-
(types, spillContext, memoryContext) -> {
185+
(types, spillContext, memoryContext, parallelSpill) -> {
186186
throw new UnsupportedOperationException();
187187
},
188188
(types, partitionFunction, spillContext, memoryContext) -> {

core/trino-main/src/test/java/io/trino/operator/join/JoinTestUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ public DummySpillerFactory failUnspill()
317317
}
318318

319319
@Override
320-
public SingleStreamSpiller create(List<Type> types, SpillContext spillContext, LocalMemoryContext memoryContext)
320+
public SingleStreamSpiller create(List<Type> types, SpillContext spillContext, LocalMemoryContext memoryContext, boolean parallelSpill)
321321
{
322322
return new SingleStreamSpiller()
323323
{

core/trino-main/src/test/java/io/trino/operator/spiller/BenchmarkBinaryFileSpiller.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ public void setup()
117117
BLOCK_ENCODING_SERDE,
118118
spillerStats,
119119
ImmutableList.of(SPILL_PATH),
120+
1,
120121
1.0,
121122
compressionCodec,
122123
encryptionEnabled);

core/trino-main/src/test/java/io/trino/spiller/TestBinaryFileSpiller.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import io.airlift.slice.Slices;
1818
import io.trino.FeaturesConfig;
1919
import io.trino.RowPagesBuilder;
20+
import io.trino.execution.TaskManagerConfig;
2021
import io.trino.execution.buffer.PageSerializer;
2122
import io.trino.execution.buffer.PagesSerdeFactory;
2223
import io.trino.memory.context.AggregatedMemoryContext;
@@ -82,7 +83,7 @@ public void setUp()
8283
featuresConfig.setSpillMaxUsedSpaceThreshold(1.0);
8384
NodeSpillConfig nodeSpillConfig = new NodeSpillConfig();
8485
BlockEncodingSerde blockEncodingSerde = new TestingBlockEncodingSerde();
85-
singleStreamSpillerFactory = new FileSingleStreamSpillerFactory(blockEncodingSerde, spillerStats, featuresConfig, nodeSpillConfig);
86+
singleStreamSpillerFactory = new FileSingleStreamSpillerFactory(blockEncodingSerde, spillerStats, featuresConfig, nodeSpillConfig, new TaskManagerConfig());
8687
factory = new GenericSpillerFactory(singleStreamSpillerFactory);
8788
PagesSerdeFactory pagesSerdeFactory = createSpillingPagesSerdeFactory(blockEncodingSerde, nodeSpillConfig.getSpillCompressionCodec());
8889
serializer = pagesSerdeFactory.createSerializer(Optional.empty());

0 commit comments

Comments
 (0)