Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,9 @@ project(':iceberg-orc') {
}

project(':iceberg-parquet') {
test {
useJUnitPlatform()
}
dependencies {
implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow')
api project(':iceberg-api')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public class TestParquetReadProjection extends TestReadProjection {
protected GenericData.Record writeAndRead(
String desc, Schema writeSchema, Schema readSchema, GenericData.Record record)
throws IOException {
File file = temp.newFile(desc + ".parquet");
File file = temp.resolve(desc + ".parquet").toFile();
file.delete();

try (FileAppender<GenericData.Record> appender =
Expand Down
176 changes: 88 additions & 88 deletions parquet/src/test/java/org/apache/iceberg/avro/TestReadProjection.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.Collections;
import java.util.Map;
import java.util.UUID;
Expand All @@ -33,29 +34,25 @@
import org.apache.iceberg.io.FileAppender;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.parquet.schema.MessageType;
import org.junit.rules.TemporaryFolder;

/** Utilities for tests that need to write Parquet files. */
class ParquetWritingTestUtils {

private ParquetWritingTestUtils() {}

static File writeRecords(TemporaryFolder temp, Schema schema, GenericData.Record... records)
static File writeRecords(Path temp, Schema schema, GenericData.Record... records)
throws IOException {
return writeRecords(temp, schema, Collections.emptyMap(), null, records);
}

static File writeRecords(
TemporaryFolder temp,
Schema schema,
Map<String, String> properties,
GenericData.Record... records)
Path temp, Schema schema, Map<String, String> properties, GenericData.Record... records)
throws IOException {
return writeRecords(temp, schema, properties, null, records);
}

static File writeRecords(
TemporaryFolder temp,
Path temp,
Schema schema,
Map<String, String> properties,
Function<MessageType, ParquetValueWriter<?>> createWriterFunc,
Expand Down Expand Up @@ -97,8 +94,8 @@ static long write(
return len;
}

static File createTempFile(TemporaryFolder temp) throws IOException {
File tmpFolder = temp.newFolder("parquet");
static File createTempFile(Path temp) throws IOException {
File tmpFolder = temp.resolve("parquet").toFile();
String filename = UUID.randomUUID().toString();
return new File(tmpFolder, FileFormat.PARQUET.addExtension(filename));
}
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
import static org.mockito.Mockito.when;

import org.apache.parquet.column.statistics.Statistics;
import org.junit.Assert;
import org.junit.Test;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

/**
* Tests for Parquet 1.5.0-Stats which cannot be evaluated like later versions of Parquet stats.
Expand All @@ -39,6 +39,6 @@ public void testCDHParquetStatistcs() {
when(cdhBinaryColumnStats.getMaxBytes()).thenReturn(null);
when(cdhBinaryColumnStats.getMinBytes()).thenReturn(null);
when(cdhBinaryColumnStats.getNumNulls()).thenReturn(0L);
Assert.assertTrue(ParquetMetricsRowGroupFilter.minMaxUndefined(cdhBinaryColumnStats));
Assertions.assertTrue(ParquetMetricsRowGroupFilter.minMaxUndefined(cdhBinaryColumnStats));
}
}
27 changes: 13 additions & 14 deletions parquet/src/test/java/org/apache/iceberg/parquet/TestParquet.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.file.Path;
import java.util.Collections;
import java.util.List;
import java.util.function.Function;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.GenericRecordBuilder;
import org.apache.hadoop.fs.Path;
import org.apache.iceberg.Files;
import org.apache.iceberg.Schema;
import org.apache.iceberg.avro.AvroSchemaUtil;
Expand All @@ -49,14 +49,13 @@
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.schema.MessageType;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;

public class TestParquet {

@Rule public TemporaryFolder temp = new TemporaryFolder();
@TempDir private Path temp;

@Test
public void testRowGroupSizeConfigurable() throws IOException {
Expand All @@ -68,7 +67,7 @@ public void testRowGroupSizeConfigurable() throws IOException {

try (ParquetFileReader reader =
ParquetFileReader.open(ParquetIO.file(localInput(parquetFile)))) {
Assert.assertEquals(2, reader.getRowGroups().size());
Assertions.assertEquals(2, reader.getRowGroups().size());
}
}

Expand All @@ -83,7 +82,7 @@ public void testRowGroupSizeConfigurableWithWriter() throws IOException {

try (ParquetFileReader reader =
ParquetFileReader.open(ParquetIO.file(localInput(parquetFile)))) {
Assert.assertEquals(2, reader.getRowGroups().size());
Assertions.assertEquals(2, reader.getRowGroups().size());
}
}

Expand Down Expand Up @@ -116,7 +115,7 @@ public void testNumberOfBytesWritten() throws IOException {
records.toArray(new GenericData.Record[] {}));

long expectedSize = ParquetIO.file(localInput(file)).getLength();
Assert.assertEquals(expectedSize, actualSize);
Assertions.assertEquals(expectedSize, actualSize);
}

@Test
Expand All @@ -127,11 +126,11 @@ public void testTwoLevelList() throws IOException {
optional(2, "topbytes", Types.BinaryType.get()));
org.apache.avro.Schema avroSchema = AvroSchemaUtil.convert(schema.asStruct());

File testFile = temp.newFile();
Assert.assertTrue(testFile.delete());
File testFile = temp.toFile();
Assertions.assertTrue(testFile.delete());

ParquetWriter<GenericRecord> writer =
AvroParquetWriter.<GenericRecord>builder(new Path(testFile.toURI()))
AvroParquetWriter.<GenericRecord>builder(new org.apache.hadoop.fs.Path(testFile.toURI()))
.withDataModel(GenericData.get())
.withSchema(avroSchema)
.config("parquet.avro.add-list-element-records", "true")
Expand All @@ -154,8 +153,8 @@ public void testTwoLevelList() throws IOException {
Iterables.getOnlyElement(
Parquet.read(Files.localInput(testFile)).project(schema).callInit().build());

Assert.assertEquals(expectedByteList, recordRead.get("arraybytes"));
Assert.assertEquals(expectedBinary, recordRead.get("topbytes"));
Assertions.assertEquals(expectedByteList, recordRead.get("arraybytes"));
Assertions.assertEquals(expectedBinary, recordRead.get("topbytes"));
}

private Pair<File, Long> generateFile(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@
*/
package org.apache.iceberg.parquet;

import static org.apache.iceberg.parquet.ParquetWritingTestUtils.createTempFile;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.file.Path;
import java.util.List;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.FileContent;
Expand All @@ -43,11 +46,10 @@
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Types;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;

public class TestParquetDataWriter {
private static final Schema SCHEMA =
Expand All @@ -58,9 +60,9 @@ public class TestParquetDataWriter {

private List<Record> records;

@Rule public TemporaryFolder temp = new TemporaryFolder();
@TempDir private Path temp;

@Before
@BeforeEach
public void createRecords() {
GenericRecord record = GenericRecord.create(SCHEMA);

Expand All @@ -76,7 +78,7 @@ public void createRecords() {

@Test
public void testDataWriter() throws IOException {
OutputFile file = Files.localOutput(temp.newFile());
OutputFile file = Files.localOutput(createTempFile(temp));

SortOrder sortOrder = SortOrder.builderFor(SCHEMA).withOrderId(10).asc("id").build();

Expand All @@ -99,13 +101,13 @@ public void testDataWriter() throws IOException {

DataFile dataFile = dataWriter.toDataFile();

Assert.assertEquals("Format should be Parquet", FileFormat.PARQUET, dataFile.format());
Assert.assertEquals("Should be data file", FileContent.DATA, dataFile.content());
Assert.assertEquals("Record count should match", records.size(), dataFile.recordCount());
Assert.assertEquals("Partition should be empty", 0, dataFile.partition().size());
Assert.assertEquals(
"Sort order should match", sortOrder.orderId(), (int) dataFile.sortOrderId());
Assert.assertNull("Key metadata should be null", dataFile.keyMetadata());
Assertions.assertEquals(FileFormat.PARQUET, dataFile.format(), "Format should be Parquet");
Assertions.assertEquals(FileContent.DATA, dataFile.content(), "Should be data file");
Assertions.assertEquals(records.size(), dataFile.recordCount(), "Record count should match");
Assertions.assertEquals(0, dataFile.partition().size(), "Partition should be empty");
Assertions.assertEquals(
sortOrder.orderId(), (int) dataFile.sortOrderId(), "Sort order should match");
Assertions.assertNull(dataFile.keyMetadata(), "Key metadata should be null");

List<Record> writtenRecords;
try (CloseableIterable<Record> reader =
Expand All @@ -116,17 +118,17 @@ public void testDataWriter() throws IOException {
writtenRecords = Lists.newArrayList(reader);
}

Assert.assertEquals("Written records should match", records, writtenRecords);
Assertions.assertEquals(records, writtenRecords, "Written records should match");
}

@SuppressWarnings("checkstyle:AvoidEscapedUnicodeCharacters")
@Test
public void testInvalidUpperBoundString() throws Exception {
OutputFile file = Files.localOutput(temp.newFile());
OutputFile file = Files.localOutput(createTempFile(temp));

Table testTable =
TestTables.create(
temp.newFile(),
createTempFile(temp),
"test_invalid_string_bound",
SCHEMA,
PartitionSpec.unpartitioned(),
Expand Down Expand Up @@ -167,12 +169,12 @@ public void testInvalidUpperBoundString() throws Exception {

DataFile dataFile = dataWriter.toDataFile();

Assert.assertEquals("Format should be Parquet", FileFormat.PARQUET, dataFile.format());
Assert.assertEquals("Should be data file", FileContent.DATA, dataFile.content());
Assert.assertEquals(
"Record count should match", overflowRecords.size(), dataFile.recordCount());
Assert.assertEquals("Partition should be empty", 0, dataFile.partition().size());
Assert.assertNull("Key metadata should be null", dataFile.keyMetadata());
Assertions.assertEquals(FileFormat.PARQUET, dataFile.format(), "Format should be Parquet");
Assertions.assertEquals(FileContent.DATA, dataFile.content(), "Should be data file");
Assertions.assertEquals(
overflowRecords.size(), dataFile.recordCount(), "Record count should match");
Assertions.assertEquals(0, dataFile.partition().size(), "Partition should be empty");
Assertions.assertNull(dataFile.keyMetadata(), "Key metadata should be null");

List<Record> writtenRecords;
try (CloseableIterable<Record> reader =
Expand All @@ -183,22 +185,22 @@ public void testInvalidUpperBoundString() throws Exception {
writtenRecords = Lists.newArrayList(reader);
}

Assert.assertEquals("Written records should match", overflowRecords, writtenRecords);
Assertions.assertEquals(overflowRecords, writtenRecords, "Written records should match");

Assert.assertTrue("Should have a valid lower bound", dataFile.lowerBounds().containsKey(1));
Assert.assertTrue("Should have a valid upper bound", dataFile.upperBounds().containsKey(1));
Assert.assertTrue("Should have a valid lower bound", dataFile.lowerBounds().containsKey(2));
Assert.assertFalse("Should have a null upper bound", dataFile.upperBounds().containsKey(2));
Assertions.assertTrue(dataFile.lowerBounds().containsKey(1), "Should have a valid lower bound");
Assertions.assertTrue(dataFile.upperBounds().containsKey(1), "Should have a valid upper bound");
Assertions.assertTrue(dataFile.lowerBounds().containsKey(2), "Should have a valid lower bound");
Assertions.assertFalse(dataFile.upperBounds().containsKey(2), "Should have a null upper bound");
}

@SuppressWarnings("checkstyle:AvoidEscapedUnicodeCharacters")
@Test
public void testInvalidUpperBoundBinary() throws Exception {
OutputFile file = Files.localOutput(temp.newFile());
OutputFile file = Files.localOutput(createTempFile(temp));

Table testTable =
TestTables.create(
temp.newFile(),
createTempFile(temp),
"test_invalid_binary_bound",
SCHEMA,
PartitionSpec.unpartitioned(),
Expand Down Expand Up @@ -238,12 +240,12 @@ public void testInvalidUpperBoundBinary() throws Exception {

DataFile dataFile = dataWriter.toDataFile();

Assert.assertEquals("Format should be Parquet", FileFormat.PARQUET, dataFile.format());
Assert.assertEquals("Should be data file", FileContent.DATA, dataFile.content());
Assert.assertEquals(
"Record count should match", overflowRecords.size(), dataFile.recordCount());
Assert.assertEquals("Partition should be empty", 0, dataFile.partition().size());
Assert.assertNull("Key metadata should be null", dataFile.keyMetadata());
Assertions.assertEquals(FileFormat.PARQUET, dataFile.format(), "Format should be Parquet");
Assertions.assertEquals(FileContent.DATA, dataFile.content(), "Should be data file");
Assertions.assertEquals(
overflowRecords.size(), dataFile.recordCount(), "Record count should match");
Assertions.assertEquals(0, dataFile.partition().size(), "Partition should be empty");
Assertions.assertNull(dataFile.keyMetadata(), "Key metadata should be null");

List<Record> writtenRecords;
try (CloseableIterable<Record> reader =
Expand All @@ -254,11 +256,11 @@ public void testInvalidUpperBoundBinary() throws Exception {
writtenRecords = Lists.newArrayList(reader);
}

Assert.assertEquals("Written records should match", overflowRecords, writtenRecords);
Assertions.assertEquals(overflowRecords, writtenRecords, "Written records should match");

Assert.assertTrue("Should have a valid lower bound", dataFile.lowerBounds().containsKey(1));
Assert.assertTrue("Should have a valid upper bound", dataFile.upperBounds().containsKey(1));
Assert.assertTrue("Should have a valid lower bound", dataFile.lowerBounds().containsKey(3));
Assert.assertFalse("Should have a null upper bound", dataFile.upperBounds().containsKey(3));
Assertions.assertTrue(dataFile.lowerBounds().containsKey(1), "Should have a valid lower bound");
Assertions.assertTrue(dataFile.upperBounds().containsKey(1), "Should have a valid upper bound");
Assertions.assertTrue(dataFile.lowerBounds().containsKey(3), "Should have a valid lower bound");
Assertions.assertFalse(dataFile.upperBounds().containsKey(3), "Should have a null upper bound");
}
}
Loading