Skip to content

Commit 7ed9b2e

Browse files
committed
Use bigger data set to address testStatsBasedRepartitionData flakiness
Previously the test used `tpch.tiny.nation`. The tiny schema is sf0.01. Now will use sf0.03. Since the test requires source table to have stats, a copy of the table is created. Despite the increased data size locally the test runs faster now. This is because the test internally uses retries to cover flakiness and now it needs fewer retries to succeed (usually no retries).
1 parent baa6d11 commit 7ed9b2e

File tree

1 file changed

+29
-18
lines changed

1 file changed

+29
-18
lines changed

plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@
114114
import static io.trino.plugin.iceberg.IcebergFileFormat.ORC;
115115
import static io.trino.plugin.iceberg.IcebergFileFormat.PARQUET;
116116
import static io.trino.plugin.iceberg.IcebergQueryRunner.ICEBERG_CATALOG;
117+
import static io.trino.plugin.iceberg.IcebergSessionProperties.COLLECT_EXTENDED_STATISTICS_ON_WRITE;
117118
import static io.trino.plugin.iceberg.IcebergSessionProperties.EXTENDED_STATISTICS_ENABLED;
118119
import static io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD;
119120
import static io.trino.plugin.iceberg.IcebergTestUtils.withSmallRowGroups;
@@ -4576,28 +4577,38 @@ public void testStatsBasedRepartitionDataOnInsert()
45764577

45774578
private void testStatsBasedRepartitionData(boolean ctas)
45784579
{
4579-
Session sessionRepartitionMany = Session.builder(getSession())
4580-
.setSystemProperty(SCALE_WRITERS, "false")
4581-
.setSystemProperty(USE_PREFERRED_WRITE_PARTITIONING, "false")
4582-
.build();
4583-
// Use DISTINCT to add data redistribution between source table and the writer. This makes it more likely that all writers get some data.
4584-
String sourceRelation = "(SELECT DISTINCT orderkey, custkey, orderstatus FROM tpch.tiny.orders)";
4585-
testRepartitionData(
4586-
getSession(),
4587-
sourceRelation,
4588-
ctas,
4589-
"'orderstatus'",
4590-
3);
4591-
// Test uses relatively small table (15K rows). When engine doesn't redistribute data for writes,
4592-
// occasionally a worker node doesn't get any data and fewer files get created.
4593-
assertEventually(new Duration(3, MINUTES), () -> {
4580+
String catalog = getSession().getCatalog().orElseThrow();
4581+
try (TestTable sourceTable = new TestTable(
4582+
sql -> assertQuerySucceeds(
4583+
Session.builder(getSession())
4584+
.setCatalogSessionProperty(catalog, COLLECT_EXTENDED_STATISTICS_ON_WRITE, "true")
4585+
.build(),
4586+
sql),
4587+
"temp_table_analyzed",
4588+
"AS SELECT orderkey, custkey, orderstatus FROM tpch.\"sf0.03\".orders")) {
4589+
Session sessionRepartitionMany = Session.builder(getSession())
4590+
.setSystemProperty(SCALE_WRITERS, "false")
4591+
.setSystemProperty(USE_PREFERRED_WRITE_PARTITIONING, "false")
4592+
.build();
4593+
// Use DISTINCT to add data redistribution between source table and the writer. This makes it more likely that all writers get some data.
4594+
String sourceRelation = "(SELECT DISTINCT orderkey, custkey, orderstatus FROM " + sourceTable.getName() + ")";
45944595
testRepartitionData(
4595-
sessionRepartitionMany,
4596+
getSession(),
45964597
sourceRelation,
45974598
ctas,
45984599
"'orderstatus'",
4599-
9);
4600-
});
4600+
3);
4601+
// Test uses relatively small table (45K rows). When engine doesn't redistribute data for writes,
4602+
// occasionally a worker node doesn't get any data and fewer files get created.
4603+
assertEventually(new Duration(3, MINUTES), () -> {
4604+
testRepartitionData(
4605+
sessionRepartitionMany,
4606+
sourceRelation,
4607+
ctas,
4608+
"'orderstatus'",
4609+
9);
4610+
});
4611+
}
46014612
}
46024613

46034614
private void testRepartitionData(Session session, String sourceRelation, boolean ctas, String partitioning, int expectedFiles)

0 commit comments

Comments
 (0)