|
36 | 36 | import java.nio.ByteBuffer; |
37 | 37 | import java.nio.file.Path; |
38 | 38 | import java.util.Collections; |
| 39 | +import java.util.Iterator; |
39 | 40 | import java.util.List; |
40 | 41 | import java.util.function.Function; |
41 | 42 | import java.util.stream.Stream; |
@@ -314,6 +315,176 @@ public void testFooterMetricsWithNameMappingForFileWithoutIds() throws IOExcepti |
314 | 315 | } |
315 | 316 | } |
316 | 317 |
|
| 318 | + @Test |
| 319 | + public void testVectorizedReaderFactoryConfiguration() throws IOException { |
| 320 | + Schema schema = new Schema(optional(1, "intCol", IntegerType.get())); |
| 321 | + File file = createTempFile(temp); |
| 322 | + |
| 323 | + // Write test data |
| 324 | + List<GenericData.Record> records = Lists.newArrayList(); |
| 325 | + org.apache.avro.Schema avroSchema = AvroSchemaUtil.convert(schema.asStruct()); |
| 326 | + GenericData.Record record = new GenericData.Record(avroSchema); |
| 327 | + record.put("intCol", 42); |
| 328 | + records.add(record); |
| 329 | + |
| 330 | + write(file, schema, Collections.emptyMap(), null, records.toArray(new GenericData.Record[] {})); |
| 331 | + |
| 332 | + // Reset the flag |
| 333 | + TestMockVectorizedReaderFactory.wasCalled = false; |
| 334 | + |
| 335 | + // Test setting vectorized reader factory |
| 336 | + Parquet.ReadBuilder readBuilder = |
| 337 | + Parquet.read(Files.localInput(file)) |
| 338 | + .project(schema) |
| 339 | + .createBatchedReaderFunc(fileSchema -> new MockVectorizedReader()) |
| 340 | + .vectorizedReaderFactory(MockVectorizedReaderFactory.class.getName()); |
| 341 | + |
| 342 | + // We can't easily verify the property directly since it's private, |
| 343 | + // but we can verify the build succeeds |
| 344 | + readBuilder.build().iterator(); // Should not throw |
| 345 | + |
| 346 | + // Verify our mock factory was NOT used (because MockVectorizedReaderFactory is not a valid factory) |
| 347 | + assertThat(TestMockVectorizedReaderFactory.wasCalled) |
| 348 | + .as("TestMockVectorizedReaderFactory should not have been called") |
| 349 | + .isFalse(); |
| 350 | + } |
| 351 | + |
| 352 | + @Test |
| 353 | + public void testVectorizedReaderFactoryRemoveWithNull() throws IOException { |
| 354 | + Schema schema = new Schema(optional(1, "intCol", IntegerType.get())); |
| 355 | + File file = createTempFile(temp); |
| 356 | + |
| 357 | + // Write test data |
| 358 | + List<GenericData.Record> records = Lists.newArrayList(); |
| 359 | + org.apache.avro.Schema avroSchema = AvroSchemaUtil.convert(schema.asStruct()); |
| 360 | + GenericData.Record record = new GenericData.Record(avroSchema); |
| 361 | + record.put("intCol", 42); |
| 362 | + records.add(record); |
| 363 | + |
| 364 | + write(file, schema, Collections.emptyMap(), null, records.toArray(new GenericData.Record[] {})); |
| 365 | + |
| 366 | + // Test removing vectorized reader factory with null |
| 367 | + Parquet.ReadBuilder readBuilder = |
| 368 | + Parquet.read(Files.localInput(file)) |
| 369 | + .project(schema) |
| 370 | + .createBatchedReaderFunc(fileSchema -> new MockVectorizedReader()) |
| 371 | + .vectorizedReaderFactory(MockVectorizedReaderFactory.class.getName()) |
| 372 | + .vectorizedReaderFactory(null); // Remove it |
| 373 | + |
| 374 | + // Build should succeed and use default reader |
| 375 | + readBuilder.build().iterator(); // Should not throw |
| 376 | + } |
| 377 | + |
| 378 | + @Test |
| 379 | + public void testVectorizedReaderFactoryMissingClass() throws IOException { |
| 380 | + Schema schema = new Schema(optional(1, "intCol", IntegerType.get())); |
| 381 | + File file = createTempFile(temp); |
| 382 | + |
| 383 | + // Write test data |
| 384 | + List<GenericData.Record> records = Lists.newArrayList(); |
| 385 | + org.apache.avro.Schema avroSchema = AvroSchemaUtil.convert(schema.asStruct()); |
| 386 | + GenericData.Record record = new GenericData.Record(avroSchema); |
| 387 | + record.put("intCol", 42); |
| 388 | + records.add(record); |
| 389 | + |
| 390 | + write(file, schema, Collections.emptyMap(), null, records.toArray(new GenericData.Record[] {})); |
| 391 | + |
| 392 | + // Test with non-existent class - should fall back to default reader |
| 393 | + Parquet.ReadBuilder readBuilder = |
| 394 | + Parquet.read(Files.localInput(file)) |
| 395 | + .project(schema) |
| 396 | + .createBatchedReaderFunc(fileSchema -> new MockVectorizedReader()) |
| 397 | + .vectorizedReaderFactory("com.example.NonExistentFactory"); |
| 398 | + |
| 399 | + // Should not throw - falls back to default reader |
| 400 | + readBuilder.build().iterator(); |
| 401 | + } |
| 402 | + |
| 403 | + @Test |
| 404 | + public void testVectorizedReaderFactoryInvalidClass() throws IOException { |
| 405 | + Schema schema = new Schema(optional(1, "intCol", IntegerType.get())); |
| 406 | + File file = createTempFile(temp); |
| 407 | + |
| 408 | + // Write test data |
| 409 | + List<GenericData.Record> records = Lists.newArrayList(); |
| 410 | + org.apache.avro.Schema avroSchema = AvroSchemaUtil.convert(schema.asStruct()); |
| 411 | + GenericData.Record record = new GenericData.Record(avroSchema); |
| 412 | + record.put("intCol", 42); |
| 413 | + records.add(record); |
| 414 | + |
| 415 | + write(file, schema, Collections.emptyMap(), null, records.toArray(new GenericData.Record[] {})); |
| 416 | + |
| 417 | + // Test with a class that doesn't implement VectorizedParquetReaderFactory |
| 418 | + Parquet.ReadBuilder readBuilder = |
| 419 | + Parquet.read(Files.localInput(file)) |
| 420 | + .project(schema) |
| 421 | + .createBatchedReaderFunc(fileSchema -> new MockVectorizedReader()) |
| 422 | + .vectorizedReaderFactory(InvalidReaderFactory.class.getName()); |
| 423 | + |
| 424 | + // Should not throw - falls back to default reader |
| 425 | + readBuilder.build().iterator(); |
| 426 | + } |
| 427 | + |
| 428 | + @Test |
| 429 | + public void testVectorizedReaderFactoryNoDefaultConstructor() throws IOException { |
| 430 | + Schema schema = new Schema(optional(1, "intCol", IntegerType.get())); |
| 431 | + File file = createTempFile(temp); |
| 432 | + |
| 433 | + // Write test data |
| 434 | + List<GenericData.Record> records = Lists.newArrayList(); |
| 435 | + org.apache.avro.Schema avroSchema = AvroSchemaUtil.convert(schema.asStruct()); |
| 436 | + GenericData.Record record = new GenericData.Record(avroSchema); |
| 437 | + record.put("intCol", 42); |
| 438 | + records.add(record); |
| 439 | + |
| 440 | + write(file, schema, Collections.emptyMap(), null, records.toArray(new GenericData.Record[] {})); |
| 441 | + |
| 442 | + // Test with a class that has no default constructor |
| 443 | + Parquet.ReadBuilder readBuilder = |
| 444 | + Parquet.read(Files.localInput(file)) |
| 445 | + .project(schema) |
| 446 | + .createBatchedReaderFunc(fileSchema -> new MockVectorizedReader()) |
| 447 | + .vectorizedReaderFactory(NoDefaultConstructorFactory.class.getName()); |
| 448 | + |
| 449 | + // Should not throw - falls back to default reader |
| 450 | + readBuilder.build().iterator(); |
| 451 | + } |
| 452 | + |
| 453 | + @Test |
| 454 | + public void testVectorizedReaderFactorySuccessfulLoad() throws IOException { |
| 455 | + Schema schema = new Schema(optional(1, "intCol", IntegerType.get())); |
| 456 | + File file = createTempFile(temp); |
| 457 | + |
| 458 | + // Write test data |
| 459 | + List<GenericData.Record> records = Lists.newArrayList(); |
| 460 | + org.apache.avro.Schema avroSchema = AvroSchemaUtil.convert(schema.asStruct()); |
| 461 | + GenericData.Record record = new GenericData.Record(avroSchema); |
| 462 | + record.put("intCol", 42); |
| 463 | + records.add(record); |
| 464 | + |
| 465 | + write(file, schema, Collections.emptyMap(), null, records.toArray(new GenericData.Record[] {})); |
| 466 | + |
| 467 | + // Reset the flag |
| 468 | + TestMockVectorizedReaderFactory.wasCalled = false; |
| 469 | + |
| 470 | + // Test successful factory loading |
| 471 | + Parquet.ReadBuilder readBuilder = |
| 472 | + Parquet.read(Files.localInput(file)) |
| 473 | + .project(schema) |
| 474 | + .createBatchedReaderFunc(fileSchema -> new MockVectorizedReader()) |
| 475 | + .vectorizedReaderFactory(TestMockVectorizedReaderFactory.class.getName()); |
| 476 | + |
| 477 | + // Build and consume the reader |
| 478 | + Iterator<?> iterator = readBuilder.build().iterator(); |
| 479 | + assertThat(iterator.hasNext()).isTrue(); |
| 480 | + iterator.next(); |
| 481 | + |
| 482 | + // Verify our mock factory was actually used |
| 483 | + assertThat(TestMockVectorizedReaderFactory.wasCalled) |
| 484 | + .as("Mock factory should have been called") |
| 485 | + .isTrue(); |
| 486 | + } |
| 487 | + |
317 | 488 | private Pair<File, Long> generateFile( |
318 | 489 | Function<MessageType, ParquetValueWriter<?>> createWriterFunc, |
319 | 490 | int desiredRecordCount, |
@@ -354,4 +525,74 @@ private Pair<File, Long> generateFile( |
354 | 525 | records.toArray(new GenericData.Record[] {})); |
355 | 526 | return Pair.of(file, size); |
356 | 527 | } |
| 528 | + |
| 529 | + // Test helper classes |
| 530 | + |
| 531 | + /** A mock VectorizedReader for testing. */ |
| 532 | + public static class MockVectorizedReader implements VectorizedReader<Object> { |
| 533 | + @Override |
| 534 | + public Object read(Object reuse, int numRows) { |
| 535 | + return null; |
| 536 | + } |
| 537 | + |
| 538 | + @Override |
| 539 | + public void setBatchSize(int batchSize) { |
| 540 | + // No-op |
| 541 | + } |
| 542 | + |
| 543 | + @Override |
| 544 | + public void close() { |
| 545 | + // No-op |
| 546 | + } |
| 547 | + } |
| 548 | + |
| 549 | + /** A mock factory class that implements VectorizedParquetReaderFactory for testing. */ |
| 550 | + public static class TestMockVectorizedReaderFactory implements VectorizedParquetReaderFactory { |
| 551 | + static boolean wasCalled = false; |
| 552 | + |
| 553 | + @Override |
| 554 | + public String name() { |
| 555 | + return "test-mock"; |
| 556 | + } |
| 557 | + |
| 558 | + @Override |
| 559 | + @SuppressWarnings("unchecked") |
| 560 | + public <T> org.apache.iceberg.io.CloseableIterable<T> createReader(ReaderParams params) { |
| 561 | + wasCalled = true; |
| 562 | + // Return a simple iterable that provides the mock data |
| 563 | + GenericData.Record record = |
| 564 | + new GenericData.Record(AvroSchemaUtil.convert(params.schema().asStruct(), "table")); |
| 565 | + record.put(0, 42); |
| 566 | + return (org.apache.iceberg.io.CloseableIterable<T>) |
| 567 | + org.apache.iceberg.io.CloseableIterable.withNoopClose(Collections.singletonList(record)); |
| 568 | + } |
| 569 | + } |
| 570 | + |
| 571 | + /** A mock factory class without implementing the interface. */ |
| 572 | + public static class InvalidReaderFactory { |
| 573 | + public InvalidReaderFactory() {} |
| 574 | + |
| 575 | + public String name() { |
| 576 | + return "invalid"; |
| 577 | + } |
| 578 | + } |
| 579 | + |
| 580 | + /** A mock factory class with no default constructor. */ |
| 581 | + public static class NoDefaultConstructorFactory implements VectorizedParquetReaderFactory { |
| 582 | + @SuppressWarnings("unused") |
| 583 | + public NoDefaultConstructorFactory(String unusedParam) {} |
| 584 | + |
| 585 | + @Override |
| 586 | + public String name() { |
| 587 | + return "no-default"; |
| 588 | + } |
| 589 | + |
| 590 | + @Override |
| 591 | + public <T> org.apache.iceberg.io.CloseableIterable<T> createReader(ReaderParams params) { |
| 592 | + return null; |
| 593 | + } |
| 594 | + } |
| 595 | + |
| 596 | + /** A simple reference class that can be loaded. */ |
| 597 | + public static class MockVectorizedReaderFactory {} |
357 | 598 | } |
0 commit comments