diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/HoodieBucketIndex.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/HoodieBucketIndex.java index 3b3eee37e5602..d1967a5ec47fb 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/HoodieBucketIndex.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/HoodieBucketIndex.java @@ -92,7 +92,6 @@ protected HoodieRecord computeNext() { public boolean requiresTagging(WriteOperationType operationType) { switch (operationType) { case INSERT: - case INSERT_OVERWRITE: case UPSERT: case DELETE: return true; diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala index b33deebdf722f..eb2e3e03fabc2 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala @@ -1062,6 +1062,49 @@ class TestInsertTable extends HoodieSparkSqlTestBase { } } + test("Test Insert Overwrite Bucket Index Table") { + withTempDir { tmp => + val tableName = generateTableName + // Create a partitioned table + spark.sql( + s""" + |create table $tableName ( + | id int, + | dt string, + | name string, + | price double, + | ts long + |) using hudi + | tblproperties ( + | primaryKey = 'id', + | preCombineField = 'ts', + | hoodie.index.type = 'BUCKET') + | partitioned by (dt) + | location '${tmp.getCanonicalPath}' + """.stripMargin) + + spark.sql( + s""" + | insert into $tableName values + | (1, 'a1', 10, 1000, "2021-01-05") + """.stripMargin) + + checkAnswer(s"select id, name, price, ts, dt from $tableName")( + Seq(1, "a1", 10.0, 1000, "2021-01-05") + ) + + spark.sql( + s""" + | insert overwrite $tableName partition(dt = '2021-01-05') + | select 1 as id, 'a2' as name, 11 as price, 1000 as ts + """.stripMargin) + + checkAnswer(s"select id, name, price, ts, dt from $tableName")( + Seq(1, "a2", 11.0, 1000, "2021-01-05") + ) + } + } + /** * This test is to make sure that bulk insert doesn't create a bunch of tiny files if * hoodie.bulkinsert.user.defined.partitioner.sort.columns doesn't start with the partition columns