|
114 | 114 | import static io.trino.plugin.iceberg.IcebergFileFormat.ORC; |
115 | 115 | import static io.trino.plugin.iceberg.IcebergFileFormat.PARQUET; |
116 | 116 | import static io.trino.plugin.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; |
| 117 | +import static io.trino.plugin.iceberg.IcebergSessionProperties.COLLECT_EXTENDED_STATISTICS_ON_WRITE; |
117 | 118 | import static io.trino.plugin.iceberg.IcebergSessionProperties.EXTENDED_STATISTICS_ENABLED; |
118 | 119 | import static io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD; |
119 | 120 | import static io.trino.plugin.iceberg.IcebergTestUtils.withSmallRowGroups; |
@@ -4576,28 +4577,38 @@ public void testStatsBasedRepartitionDataOnInsert() |
4576 | 4577 |
|
4577 | 4578 | private void testStatsBasedRepartitionData(boolean ctas) |
4578 | 4579 | { |
4579 | | - Session sessionRepartitionMany = Session.builder(getSession()) |
4580 | | - .setSystemProperty(SCALE_WRITERS, "false") |
4581 | | - .setSystemProperty(USE_PREFERRED_WRITE_PARTITIONING, "false") |
4582 | | - .build(); |
4583 | | - // Use DISTINCT to add data redistribution between source table and the writer. This makes it more likely that all writers get some data. |
4584 | | - String sourceRelation = "(SELECT DISTINCT orderkey, custkey, orderstatus FROM tpch.tiny.orders)"; |
4585 | | - testRepartitionData( |
4586 | | - getSession(), |
4587 | | - sourceRelation, |
4588 | | - ctas, |
4589 | | - "'orderstatus'", |
4590 | | - 3); |
4591 | | - // Test uses relatively small table (15K rows). When engine doesn't redistribute data for writes, |
4592 | | - // occasionally a worker node doesn't get any data and fewer files get created. |
4593 | | - assertEventually(new Duration(3, MINUTES), () -> { |
| 4580 | + String catalog = getSession().getCatalog().orElseThrow(); |
| 4581 | + try (TestTable sourceTable = new TestTable( |
| 4582 | + sql -> assertQuerySucceeds( |
| 4583 | + Session.builder(getSession()) |
| 4584 | + .setCatalogSessionProperty(catalog, COLLECT_EXTENDED_STATISTICS_ON_WRITE, "true") |
| 4585 | + .build(), |
| 4586 | + sql), |
| 4587 | + "temp_table_analyzed", |
| 4588 | + "AS SELECT orderkey, custkey, orderstatus FROM tpch.\"sf0.03\".orders")) { |
| 4589 | + Session sessionRepartitionMany = Session.builder(getSession()) |
| 4590 | + .setSystemProperty(SCALE_WRITERS, "false") |
| 4591 | + .setSystemProperty(USE_PREFERRED_WRITE_PARTITIONING, "false") |
| 4592 | + .build(); |
| 4593 | + // Use DISTINCT to add data redistribution between source table and the writer. This makes it more likely that all writers get some data. |
| 4594 | + String sourceRelation = "(SELECT DISTINCT orderkey, custkey, orderstatus FROM " + sourceTable.getName() + ")"; |
4594 | 4595 | testRepartitionData( |
4595 | | - sessionRepartitionMany, |
| 4596 | + getSession(), |
4596 | 4597 | sourceRelation, |
4597 | 4598 | ctas, |
4598 | 4599 | "'orderstatus'", |
4599 | | - 9); |
4600 | | - }); |
| 4600 | + 3); |
| 4601 | + // Test uses relatively small table (45K rows). When engine doesn't redistribute data for writes, |
| 4602 | + // occasionally a worker node doesn't get any data and fewer files get created. |
| 4603 | + assertEventually(new Duration(3, MINUTES), () -> { |
| 4604 | + testRepartitionData( |
| 4605 | + sessionRepartitionMany, |
| 4606 | + sourceRelation, |
| 4607 | + ctas, |
| 4608 | + "'orderstatus'", |
| 4609 | + 9); |
| 4610 | + }); |
| 4611 | + } |
4601 | 4612 | } |
4602 | 4613 |
|
4603 | 4614 | private void testRepartitionData(Session session, String sourceRelation, boolean ctas, String partitioning, int expectedFiles) |
|
0 commit comments