diff --git a/presto-main/src/main/java/com/facebook/presto/cost/HistoricalPlanStatisticsUtil.java b/presto-main/src/main/java/com/facebook/presto/cost/HistoricalPlanStatisticsUtil.java index dbad0928beaa6..7570dffa9a303 100644 --- a/presto-main/src/main/java/com/facebook/presto/cost/HistoricalPlanStatisticsUtil.java +++ b/presto-main/src/main/java/com/facebook/presto/cost/HistoricalPlanStatisticsUtil.java @@ -40,7 +40,10 @@ public static PlanStatistics getPredictedPlanStatistics( if (lastRunsStatistics.isEmpty()) { return PlanStatistics.empty(); } - + if (inputTableStatistics.stream().anyMatch(stat -> stat.getRowCount().isUnknown())) { + // return most recent run stats if input table stats were not found + return lastRunsStatistics.get(lastRunsStatistics.size() - 1).getPlanStatistics(); + } Optional similarStatsIndex = getSimilarStatsIndex(historicalPlanStatistics, inputTableStatistics, historyMatchingThreshold); if (similarStatsIndex.isPresent()) { diff --git a/presto-main/src/main/java/com/facebook/presto/cost/PlanNodeStatsEstimate.java b/presto-main/src/main/java/com/facebook/presto/cost/PlanNodeStatsEstimate.java index 8293de807d585..158e4f01ee57e 100644 --- a/presto-main/src/main/java/com/facebook/presto/cost/PlanNodeStatsEstimate.java +++ b/presto-main/src/main/java/com/facebook/presto/cost/PlanNodeStatsEstimate.java @@ -160,6 +160,10 @@ public double getOutputSizeInBytes(PlanNode planNode) if (!sourceInfo.estimateSizeUsingVariables() && !isNaN(totalSize)) { return totalSize; } + if (!isConfident()) { + // If we are not confident ( Non hbo stats + no row count info available) then we should not compute the output size + return NaN; + } return getOutputSizeForVariables(planNode.getOutputVariables()); } diff --git a/presto-main/src/main/java/com/facebook/presto/cost/SimpleStatsRule.java b/presto-main/src/main/java/com/facebook/presto/cost/SimpleStatsRule.java index a7b8fa6a3559d..783236497d22b 100644 --- a/presto-main/src/main/java/com/facebook/presto/cost/SimpleStatsRule.java +++ b/presto-main/src/main/java/com/facebook/presto/cost/SimpleStatsRule.java @@ -36,8 +36,23 @@ protected SimpleStatsRule(StatsNormalizer normalizer) @Override public final Optional calculate(T node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types) { - return doCalculate(node, sourceStats, lookup, session, types) + Optional planNodeStatsEstimate = doCalculate(node, sourceStats, lookup, session, types) .map(estimate -> normalizer.normalize(estimate, node.getOutputVariables())); + if (node.getSources().isEmpty()) { + // dont do the confident check for tablescan stats + return planNodeStatsEstimate; + } + boolean confident = sourceStats.getStats(node.getSources().get(0)).isConfident(); + for (PlanNode source : node.getSources()) { + confident = sourceStats.getStats(source).isConfident(); + if (!confident) { + break; + } + } + boolean finalConfident = confident; + return planNodeStatsEstimate.map(p -> new PlanNodeStatsEstimate(p.getOutputRowCount(), + p.getTotalSize(), finalConfident, + p.getVariableStatistics(), p.getJoinNodeStatsEstimate(), p.getTableWriterNodeStatsEstimate())); } protected abstract Optional doCalculate(T node, StatsProvider sourceStats, Lookup lookup, Session session, TypeProvider types); diff --git a/presto-main/src/main/java/com/facebook/presto/cost/TableScanStatsRule.java b/presto-main/src/main/java/com/facebook/presto/cost/TableScanStatsRule.java index 2bae731d8c66a..7e11d92454274 100644 --- a/presto-main/src/main/java/com/facebook/presto/cost/TableScanStatsRule.java +++ b/presto-main/src/main/java/com/facebook/presto/cost/TableScanStatsRule.java @@ -59,6 +59,10 @@ protected Optional doCalculate(TableScanNode node, StatsP Constraint constraint = new Constraint<>(node.getCurrentConstraint()); TableStatistics tableStatistics = metadata.getTableStatistics(session, node.getTable(), ImmutableList.copyOf(node.getAssignments().values()), constraint); + if (tableStatistics.getRowCount().isUnknown()) { + // Since we do not have any hms statistics, we should not be confident + return Optional.of(PlanNodeStatsEstimate.unknown()); + } Map outputVariableStats = new HashMap<>(); for (Map.Entry entry : node.getAssignments().entrySet()) { diff --git a/presto-main/src/test/java/com/facebook/presto/cost/TestCostCalculator.java b/presto-main/src/test/java/com/facebook/presto/cost/TestCostCalculator.java index 68336bc5238d4..fd3df5b03fa3c 100644 --- a/presto-main/src/test/java/com/facebook/presto/cost/TestCostCalculator.java +++ b/presto-main/src/test/java/com/facebook/presto/cost/TestCostCalculator.java @@ -756,6 +756,7 @@ private static PlanNodeStatsEstimate statsEstimate(Collection + p.join( + INNER, + p.values(new PlanNodeId("valuesA"), aRows, p.variable("A1", BIGINT)), + p.values(new PlanNodeId("valuesB"), bRows, p.variable("B1", BIGINT)), + ImmutableList.of(new JoinNode.EquiJoinClause(p.variable("A1", BIGINT), p.variable("B1", BIGINT))), + ImmutableList.of(p.variable("A1", BIGINT), p.variable("B1", BIGINT)), + Optional.empty())) + .matches(join( + INNER, + ImmutableList.of(equiJoinClause("B1", "A1")), + Optional.empty(), + Optional.of(PARTITIONED), + values(ImmutableMap.of("B1", 0)), + values(ImmutableMap.of("A1", 0)))); + } + @Test public void testFlipAndReplicateWhenOneTableMuchSmaller() { @@ -1333,9 +1366,11 @@ public void testGetSourceTablesSizeInBytes() // two source plan nodes PlanNodeStatsEstimate sourceStatsEstimate1 = PlanNodeStatsEstimate.builder() .setOutputRowCount(10) + .setConfident(true) .build(); PlanNodeStatsEstimate sourceStatsEstimate2 = PlanNodeStatsEstimate.builder() .setOutputRowCount(20) + .setConfident(true) .build(); assertEquals( getSourceTablesSizeInBytes( @@ -1405,15 +1440,19 @@ public void testGetApproximateSourceSizeInBytes() // two source plan nodes PlanNodeStatsEstimate sourceStatsEstimate1 = PlanNodeStatsEstimate.builder() .setOutputRowCount(1000) + .setConfident(true) .build(); PlanNodeStatsEstimate sourceStatsEstimate2 = PlanNodeStatsEstimate.builder() .setOutputRowCount(2000) + .setConfident(true) .build(); PlanNodeStatsEstimate filterStatsEstimate = PlanNodeStatsEstimate.builder() .setOutputRowCount(250) + .setConfident(true) .build(); PlanNodeStatsEstimate limitStatsEstimate = PlanNodeStatsEstimate.builder() .setOutputRowCount(20) + .setConfident(true) .build(); double sourceRowCount = sourceStatsEstimate1.getOutputRowCount() + sourceStatsEstimate2.getOutputRowCount(); double unionInputRowCount = filterStatsEstimate.getOutputRowCount() + limitStatsEstimate.getOutputRowCount(); diff --git a/presto-main/src/test/java/com/facebook/presto/sql/planner/iterative/rule/test/RuleAssert.java b/presto-main/src/test/java/com/facebook/presto/sql/planner/iterative/rule/test/RuleAssert.java index 1a68954602654..4b220292a197e 100644 --- a/presto-main/src/test/java/com/facebook/presto/sql/planner/iterative/rule/test/RuleAssert.java +++ b/presto-main/src/test/java/com/facebook/presto/sql/planner/iterative/rule/test/RuleAssert.java @@ -107,7 +107,16 @@ public RuleAssert withSession(Session session) public RuleAssert overrideStats(String nodeId, PlanNodeStatsEstimate nodeStats) { - statsCalculator.setNodeStats(new PlanNodeId(nodeId), nodeStats); + // For testing all stats are confident + return overrideStats(nodeId, nodeStats, true); + } + + public RuleAssert overrideStats(String nodeId, PlanNodeStatsEstimate nodeStats, boolean confidence) + { + PlanNodeStatsEstimate statsWithConfidence = new PlanNodeStatsEstimate(nodeStats.getOutputRowCount(), + nodeStats.getTotalSize(), confidence, + nodeStats.getVariableStatistics(), nodeStats.getJoinNodeStatsEstimate(), nodeStats.getTableWriterNodeStatsEstimate()); + statsCalculator.setNodeStats(new PlanNodeId(nodeId), statsWithConfidence); return this; } diff --git a/presto-main/src/test/java/com/facebook/presto/util/TestGraphvizPrinter.java b/presto-main/src/test/java/com/facebook/presto/util/TestGraphvizPrinter.java index 00a3554da6e98..ac0bd5315f2be 100644 --- a/presto-main/src/test/java/com/facebook/presto/util/TestGraphvizPrinter.java +++ b/presto-main/src/test/java/com/facebook/presto/util/TestGraphvizPrinter.java @@ -67,7 +67,7 @@ public class TestGraphvizPrinter TupleDomain.all(), TupleDomain.all()); private static final String TEST_TABLE_SCAN_NODE_INNER_OUTPUT = format( - "label=\"{TableScan | [TableHandle \\{connectorId='%s', connectorHandle='%s', layout='Optional.empty'\\}]|Estimates: \\{rows: ? (0B), cpu: ?, memory: ?, network: ?\\}\n" + + "label=\"{TableScan | [TableHandle \\{connectorId='%s', connectorHandle='%s', layout='Optional.empty'\\}]|Estimates: \\{rows: ? (?), cpu: ?, memory: ?, network: ?\\}\n" + "}\", style=\"rounded, filled\", shape=record, fillcolor=deepskyblue", TEST_CONNECTOR_ID, TEST_CONNECTOR_TABLE_HANDLE); @@ -133,12 +133,12 @@ public void testPrintDistributedFromFragments() String expected = "digraph distributed_plan {\n" + "subgraph cluster_0 {\n" + "label = \"SOURCE\"\n" + - "plannode_1[label=\"{TableScan | [TableHandle \\{connectorId='connector_id', connectorHandle='com.facebook.presto.testing.TestingMetadata$TestingTableHandle@1af56f7', layout='Optional.empty'\\}]|Estimates: \\{rows: ? (0B), cpu: ?, memory: ?, network: ?\\}\n" + + "plannode_1[label=\"{TableScan | [TableHandle \\{connectorId='connector_id', connectorHandle='com.facebook.presto.testing.TestingMetadata$TestingTableHandle@1af56f7', layout='Optional.empty'\\}]|Estimates: \\{rows: ? (?), cpu: ?, memory: ?, network: ?\\}\n" + "}\", style=\"rounded, filled\", shape=record, fillcolor=deepskyblue];\n" + "}\n" + "subgraph cluster_1 {\n" + "label = \"SOURCE\"\n" + - "plannode_1[label=\"{TableScan | [TableHandle \\{connectorId='connector_id', connectorHandle='com.facebook.presto.testing.TestingMetadata$TestingTableHandle@1af56f7', layout='Optional.empty'\\}]|Estimates: \\{rows: ? (0B), cpu: ?, memory: ?, network: ?\\}\n" + + "plannode_1[label=\"{TableScan | [TableHandle \\{connectorId='connector_id', connectorHandle='com.facebook.presto.testing.TestingMetadata$TestingTableHandle@1af56f7', layout='Optional.empty'\\}]|Estimates: \\{rows: ? (?), cpu: ?, memory: ?, network: ?\\}\n" + "}\", style=\"rounded, filled\", shape=record, fillcolor=deepskyblue];\n" + "}\n" + "}\n"; @@ -175,11 +175,11 @@ public void testPrintLogicalForJoinNode() String expected = "digraph logical_plan {\n" + "subgraph cluster_0 {\n" + "label = \"SOURCE\"\n" + - "plannode_1[label=\"{CrossJoin[REPLICATED]|Estimates: \\{rows: ? (0B), cpu: ?, memory: ?, network: ?\\}\n" + + "plannode_1[label=\"{CrossJoin[REPLICATED]|Estimates: \\{rows: ? (?), cpu: ?, memory: ?, network: ?\\}\n" + "}\", style=\"rounded, filled\", shape=record, fillcolor=orange];\n" + - "plannode_2[label=\"{TableScan | [TableHandle \\{connectorId='connector_id', connectorHandle='com.facebook.presto.testing.TestingMetadata$TestingTableHandle@1af56f7', layout='Optional.empty'\\}]|Estimates: \\{rows: ? (0B), cpu: ?, memory: ?, network: ?\\}\n" + + "plannode_2[label=\"{TableScan | [TableHandle \\{connectorId='connector_id', connectorHandle='com.facebook.presto.testing.TestingMetadata$TestingTableHandle@1af56f7', layout='Optional.empty'\\}]|Estimates: \\{rows: ? (?), cpu: ?, memory: ?, network: ?\\}\n" + "}\", style=\"rounded, filled\", shape=record, fillcolor=deepskyblue];\n" + - "plannode_3[label=\"{Values|Estimates: \\{rows: ? (0B), cpu: ?, memory: ?, network: ?\\}\n" + + "plannode_3[label=\"{Values|Estimates: \\{rows: ? (?), cpu: ?, memory: ?, network: ?\\}\n" + "}\", style=\"rounded, filled\", shape=record, fillcolor=deepskyblue];\n" + "}\n" + "plannode_1 -> plannode_3 [label = \"Build\"];\n" + //valuesNode should be the Build side