-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Refactor and improve QueryInfo and StageInfo #11580
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
407348a
630e007
7afcda0
8716d83
8365d7d
f11f5f5
ac875dd
da3aad6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ | |
| import com.google.common.collect.ImmutableMap; | ||
| import com.google.common.collect.ImmutableSet; | ||
| import com.google.common.collect.Lists; | ||
| import com.google.common.collect.Sets; | ||
| import com.google.common.util.concurrent.Futures; | ||
| import com.google.common.util.concurrent.ListenableFuture; | ||
| import io.airlift.log.Logger; | ||
|
|
@@ -733,11 +734,14 @@ private static StatementStats toStatementStats(QueryInfo queryInfo) | |
| QueryStats queryStats = queryInfo.getQueryStats(); | ||
| StageInfo outputStage = queryInfo.getOutputStage().orElse(null); | ||
|
|
||
| Set<String> globalUniqueNodes = new HashSet<>(); | ||
| StageStats rootStageStats = toStageStats(outputStage, globalUniqueNodes); | ||
|
|
||
| return StatementStats.builder() | ||
| .setState(queryInfo.getState().toString()) | ||
| .setQueued(queryInfo.getState() == QueryState.QUEUED) | ||
| .setScheduled(queryInfo.isScheduled()) | ||
| .setNodes(globalUniqueNodes(outputStage).size()) | ||
| .setNodes(globalUniqueNodes.size()) | ||
| .setTotalSplits(queryStats.getTotalDrivers()) | ||
| .setQueuedSplits(queryStats.getQueuedDrivers()) | ||
| .setRunningSplits(queryStats.getRunningDrivers() + queryStats.getBlockedDrivers()) | ||
|
|
@@ -751,35 +755,23 @@ private static StatementStats toStatementStats(QueryInfo queryInfo) | |
| .setPhysicalInputBytes(queryStats.getPhysicalInputDataSize().toBytes()) | ||
| .setPeakMemoryBytes(queryStats.getPeakUserMemoryReservation().toBytes()) | ||
| .setSpilledBytes(queryStats.getSpilledDataSize().toBytes()) | ||
| .setRootStage(toStageStats(outputStage)) | ||
| .setRootStage(rootStageStats) | ||
| .build(); | ||
| } | ||
|
|
||
| private static StageStats toStageStats(StageInfo stageInfo) | ||
| private static StageStats toStageStats(StageInfo stageInfo, Set<String> globalUniqueNodes) | ||
|
pettyjamesm marked this conversation as resolved.
Outdated
|
||
| { | ||
| if (stageInfo == null) { | ||
| return null; | ||
| } | ||
|
|
||
| io.trino.execution.StageStats stageStats = stageInfo.getStageStats(); | ||
|
|
||
| ImmutableList.Builder<StageStats> subStages = ImmutableList.builder(); | ||
| for (StageInfo subStage : stageInfo.getSubStages()) { | ||
| subStages.add(toStageStats(subStage)); | ||
| } | ||
|
|
||
| Set<String> uniqueNodes = new HashSet<>(); | ||
| for (TaskInfo task : stageInfo.getTasks()) { | ||
| // todo add nodeId to TaskInfo | ||
| URI uri = task.getTaskStatus().getSelf(); | ||
| uniqueNodes.add(uri.getHost() + ":" + uri.getPort()); | ||
| } | ||
|
|
||
| return StageStats.builder() | ||
| // Store current stage details into a builder | ||
| StageStats.Builder builder = StageStats.builder() | ||
| .setStageId(String.valueOf(stageInfo.getStageId().getId())) | ||
| .setState(stageInfo.getState().toString()) | ||
| .setDone(stageInfo.getState().isDone()) | ||
| .setNodes(uniqueNodes.size()) | ||
| .setTotalSplits(stageStats.getTotalDrivers()) | ||
| .setQueuedSplits(stageStats.getQueuedDrivers()) | ||
| .setRunningSplits(stageStats.getRunningDrivers() + stageStats.getBlockedDrivers()) | ||
|
|
@@ -791,26 +783,34 @@ private static StageStats toStageStats(StageInfo stageInfo) | |
| .setPhysicalInputBytes(stageStats.getPhysicalInputDataSize().toBytes()) | ||
| .setFailedTasks(stageStats.getFailedTasks()) | ||
| .setCoordinatorOnly(stageInfo.isCoordinatorOnly()) | ||
| .setSubStages(subStages.build()) | ||
| .build(); | ||
| } | ||
| .setNodes(countStageAndAddGlobalUniqueNodes(stageInfo, globalUniqueNodes)); | ||
|
|
||
| private static Set<String> globalUniqueNodes(StageInfo stageInfo) | ||
| { | ||
| if (stageInfo == null) { | ||
| return ImmutableSet.of(); | ||
| // Recurse into child stages to create their StageStats | ||
| List<StageInfo> subStages = stageInfo.getSubStages(); | ||
| if (subStages.isEmpty()) { | ||
| builder.setSubStages(ImmutableList.of()); | ||
| } | ||
| ImmutableSet.Builder<String> nodes = ImmutableSet.builder(); | ||
| for (TaskInfo task : stageInfo.getTasks()) { | ||
| // todo add nodeId to TaskInfo | ||
| URI uri = task.getTaskStatus().getSelf(); | ||
| nodes.add(uri.getHost() + ":" + uri.getPort()); | ||
| else { | ||
| ImmutableList.Builder<StageStats> subStagesBuilder = ImmutableList.builderWithExpectedSize(subStages.size()); | ||
| for (StageInfo subStage : subStages) { | ||
| subStagesBuilder.add(toStageStats(subStage, globalUniqueNodes)); | ||
| } | ||
| builder.setSubStages(subStagesBuilder.build()); | ||
| } | ||
|
|
||
| for (StageInfo subStage : stageInfo.getSubStages()) { | ||
| nodes.addAll(globalUniqueNodes(subStage)); | ||
| return builder.build(); | ||
| } | ||
|
|
||
| private static int countStageAndAddGlobalUniqueNodes(StageInfo stageInfo, Set<String> globalUniqueNodes) | ||
| { | ||
| List<TaskInfo> tasks = stageInfo.getTasks(); | ||
| Set<String> stageUniqueNodes = Sets.newHashSetWithExpectedSize(tasks.size()); | ||
| for (TaskInfo task : tasks) { | ||
| String nodeId = task.getTaskStatus().getNodeId(); | ||
| stageUniqueNodes.add(nodeId); | ||
| globalUniqueNodes.add(nodeId); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be better if we did
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There’s no particular advantage to bulk inserts on HashSet, the implementation just does another loop with another iterator which is probably slightly slower (extra iterator, iterator is over the Set instead of a List, temporal memory locality of access, etc) |
||
| } | ||
| return nodes.build(); | ||
| return stageUniqueNodes.size(); | ||
| } | ||
|
|
||
| private static Optional<Integer> findCancelableLeafStage(QueryInfo queryInfo) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if
scheduledshould be kept in QueryInfo and removed from QueryStats as it seems to be about a query's state rather than statistics.Will let @sopel39 decide this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I wasn’t totally sure and it’s easy enough to keep the field in both places (just redundant)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@sopel39 - think you'll have a chance to weigh in on this soon?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
makes sense.
BTW: do we need
scheduledsince we havestatein query info?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@raunaqmorarka - I took a look at moving
scheduledfromQueryStatstoQueryInfobut unfortunately it's used from withinQueryStats#getProgressPercentagewhich makes extracting it a harder problem, so I propose we leave it inQueryStatssince allQueryInfoinstances have aQueryStatsvalue they can consult.@sopel39 : We don't "need" it in the sense that you could re-compute it, but the
QueryInfolevel notion of beingscheduledis not a function of theQueryStatebut rather is set to true when of the query's stages are either done, pending, or running- so it does have semantic meaning on it's own.