apache · amogh-jahagirdar · Jul 16, 2025 · Jul 16, 2025
diff --git a/...rk/src/main/java/org/apache/iceberg/spark/data/vectorized/ArrowVectorAccessorFactory.java b/...rk/src/main/java/org/apache/iceberg/spark/data/vectorized/ArrowVectorAccessorFactory.java
@@ -22,11 +22,13 @@
 import java.nio.ByteBuffer;
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.ValueVector;
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.iceberg.arrow.vectorized.GenericArrowVectorAccessorFactory;
 import org.apache.iceberg.util.UUIDUtil;
+import org.apache.parquet.column.Dictionary;
 import org.apache.spark.sql.types.Decimal;
 import org.apache.spark.sql.vectorized.ArrowColumnVector;
 import org.apache.spark.sql.vectorized.ColumnarArray;
@@ -81,6 +83,12 @@ public UTF8String ofRow(FixedSizeBinaryVector vector, int rowId) {
       return UTF8String.fromString(UUIDUtil.convert(vector.get(rowId)).toString());
     }
 
+    @Override
+    public UTF8String ofRow(IntVector offsetVector, Dictionary dictionary, int rowId) {
+      byte[] bytes = dictionary.decodeToBinary(offsetVector.get(rowId)).getBytes();
+      return UTF8String.fromString(UUIDUtil.convert(bytes).toString());
+    }
+
     @Override
     public UTF8String ofBytes(byte[] bytes) {
       return UTF8String.fromBytes(bytes);

diff --git a/...est/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java b/...est/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java
@@ -329,4 +329,19 @@ public void testUnsupportedReadsForParquetV2() throws Exception {
         .hasMessageStartingWith("Cannot support vectorized reads for column")
         .hasMessageEndingWith("Disable vectorized reads to read this table/file");
   }
+
+  @Test
+  public void testUuidReads() throws Exception {
+    // Just one row to maintain dictionary encoding
+    int numRows = 1;
+    Schema schema = new Schema(optional(100, "uuid", Types.UUIDType.get()));
+
+    File dataFile = File.createTempFile("junit", null, temp.toFile());
+    assertThat(dataFile.delete()).as("Delete should succeed").isTrue();
+    Iterable<Record> data = generateData(schema, numRows, 0L, 0, IDENTITY);
+    try (FileAppender<Record> writer = getParquetV2Writer(schema, dataFile)) {
+      writer.addAll(data);
+    }
+    assertRecordsMatch(schema, numRows, data, dataFile, false, BATCH_SIZE);
+  }
 }