-
Notifications
You must be signed in to change notification settings - Fork 3k
Spark: Add Multi-thread to construct ReadTask #2803
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,9 +21,13 @@ | |
|
|
||
| import java.io.IOException; | ||
| import java.io.Serializable; | ||
| import java.util.Arrays; | ||
| import java.util.List; | ||
| import java.util.Locale; | ||
| import java.util.Map; | ||
| import java.util.concurrent.ForkJoinPool; | ||
| import java.util.stream.Collectors; | ||
| import java.util.stream.IntStream; | ||
| import org.apache.hadoop.conf.Configuration; | ||
| import org.apache.hadoop.fs.FileSystem; | ||
| import org.apache.hadoop.fs.Path; | ||
|
|
@@ -95,6 +99,8 @@ class Reader implements DataSourceReader, SupportsScanColumnarBatch, SupportsPus | |
| private Filter[] pushedFilters = NO_FILTERS; | ||
| private final boolean localityPreferred; | ||
| private final int batchSize; | ||
| private final int poolSize; | ||
| private final ForkJoinPool pool; | ||
|
|
||
| // lazy variables | ||
| private Schema schema = null; | ||
|
|
@@ -157,6 +163,9 @@ class Reader implements DataSourceReader, SupportsScanColumnarBatch, SupportsPus | |
| this.batchSize = options.get(SparkReadOptions.VECTORIZATION_BATCH_SIZE).map(Integer::parseInt).orElseGet(() -> | ||
| PropertyUtil.propertyAsInt(table.properties(), | ||
| TableProperties.PARQUET_BATCH_SIZE, TableProperties.PARQUET_BATCH_SIZE_DEFAULT)); | ||
| this.poolSize = options.getInt(TableProperties.SPARK_BATCH_SCAN_POOL_SIZE, | ||
| TableProperties.SPARK_BATCH_SCAN_POOL_SIZE_DEFAULT); | ||
| this.pool = new ForkJoinPool(this.poolSize); | ||
| } | ||
|
|
||
| private Schema lazySchema() { | ||
|
|
@@ -193,6 +202,7 @@ public StructType readSchema() { | |
| /** | ||
| * This is called in the Spark Driver when data is to be materialized into {@link ColumnarBatch} | ||
| */ | ||
| @SuppressWarnings("checkstyle:LocalVariableName") | ||
| @Override | ||
| public List<InputPartition<ColumnarBatch>> planBatchInputPartitions() { | ||
| Preconditions.checkState(enableBatchRead(), "Batched reads not enabled"); | ||
|
|
@@ -205,35 +215,65 @@ public List<InputPartition<ColumnarBatch>> planBatchInputPartitions() { | |
| // broadcast the table metadata as input partitions will be sent to executors | ||
| Broadcast<Table> tableBroadcast = sparkContext.broadcast(SerializableTable.copyOf(table)); | ||
|
|
||
| List<InputPartition<ColumnarBatch>> readTasks = Lists.newArrayList(); | ||
| for (CombinedScanTask task : tasks()) { | ||
| readTasks.add(new ReadTask<>( | ||
| task, tableBroadcast, expectedSchemaString, caseSensitive, | ||
| localityPreferred, new BatchReaderFactory(batchSize))); | ||
| int taskSize = tasks().size(); | ||
| InputPartition<ColumnarBatch>[] readTasks = new InputPartition[taskSize]; | ||
| Long startTime = System.currentTimeMillis(); | ||
| try { | ||
| pool.submit(() -> IntStream.range(0, taskSize).parallel() | ||
| .mapToObj(taskId -> { | ||
| LOG.trace("The size of scanTasks is {}, current taskId is {}, current thread id is {}", | ||
| taskSize, taskId, Thread.currentThread().getName()); | ||
| readTasks[taskId] = new ReadTask<>( | ||
| tasks().get(taskId), tableBroadcast, expectedSchemaString, caseSensitive, | ||
| localityPreferred, new BatchReaderFactory(batchSize)); | ||
| return true; | ||
| }).collect(Collectors.toList())).get(); | ||
| } catch (Exception e) { | ||
| LOG.error("Fail to construct ReadTask with thread size = {}, and the size of scanTasks is {}", | ||
| poolSize, taskSize, e); | ||
| System.exit(-1); | ||
| } | ||
| LOG.info("Batching input partitions with {} tasks.", readTasks.size()); | ||
|
|
||
| return readTasks; | ||
| Long endTime = System.currentTimeMillis(); | ||
| LOG.info("Batching input partitions with {} tasks.", readTasks.length); | ||
| LOG.info("It took {} s to construct {} readTasks with localityPreferred = {}.", (endTime - startTime) / 1000, | ||
| taskSize, localityPreferred); | ||
| return Arrays.asList(readTasks.clone()); | ||
| } | ||
|
|
||
| /** | ||
| * This is called in the Spark Driver when data is to be materialized into {@link InternalRow} | ||
| */ | ||
| @SuppressWarnings("checkstyle:LocalVariableName") | ||
| @Override | ||
| public List<InputPartition<InternalRow>> planInputPartitions() { | ||
| String expectedSchemaString = SchemaParser.toJson(lazySchema()); | ||
|
|
||
| // broadcast the table metadata as input partitions will be sent to executors | ||
| Broadcast<Table> tableBroadcast = sparkContext.broadcast(SerializableTable.copyOf(table)); | ||
|
|
||
| List<InputPartition<InternalRow>> readTasks = Lists.newArrayList(); | ||
| for (CombinedScanTask task : tasks()) { | ||
| readTasks.add(new ReadTask<>( | ||
| task, tableBroadcast, expectedSchemaString, caseSensitive, | ||
| localityPreferred, InternalRowReaderFactory.INSTANCE)); | ||
| List<CombinedScanTask> scanTasks = tasks(); | ||
| int taskSize = scanTasks.size(); | ||
| InputPartition<InternalRow>[] readTasks = new InputPartition[taskSize]; | ||
| Long startTime = System.currentTimeMillis(); | ||
| try { | ||
| pool.submit(() -> IntStream.range(0, taskSize).parallel() | ||
| .mapToObj(taskId -> { | ||
| LOG.trace("The size of scanTasks is {}, current taskId is {}, current thread name is {}", | ||
| taskSize, taskId, Thread.currentThread().getName()); | ||
| readTasks[taskId] = new ReadTask<>( | ||
| scanTasks.get(taskId), tableBroadcast, expectedSchemaString, caseSensitive, | ||
| localityPreferred, InternalRowReaderFactory.INSTANCE); | ||
| return true; | ||
| }).collect(Collectors.toList())).get(); | ||
| } catch (Exception e) { | ||
| LOG.error("Fail to construct ReadTask with thread size = {}, and the size of scanTasks is {}", | ||
| poolSize, taskSize, e); | ||
| System.exit(-1); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would avoid explicitly calling System.exit and instead let the exception bubble up (possibly catching it and then rethrowing it with additional information added or as another exception type). This would make it easier for end users to track down their exceptions, particularly when working in a notebook where there's limited space to display stack traces already. Is there a specific reason you chose to call
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for reviewing this! I wanted to throw exception out at first, and if I did this, I also need to change code of
|
||
| } | ||
|
|
||
| return readTasks; | ||
| Long endTime = System.currentTimeMillis(); | ||
| LOG.info("It took {} s to construct {} readTasks with localityPreferred = {}.", (endTime - startTime) / 1000, | ||
| taskSize, localityPreferred); | ||
| return Arrays.asList(readTasks.clone()); | ||
| } | ||
|
|
||
| @Override | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should the construction of all read tasks be done in a single
submitto the thread pool? I don’t see any way to slow the parallelism down here so as to not potentially overwhelm the name node.For example, I would have expected that each of the ranges in the int stream were submitted to the pool invidually, so that tasks queue up waiting for their turn. Here, it looks like the parallelism is rather unbounded. Totally open to reading this wrong (it is Sunday for me after all!).