diff --git a/api/src/main/java/org/apache/iceberg/ContentFile.java b/api/src/main/java/org/apache/iceberg/ContentFile.java index 262ea1733027..11d4a4da26e9 100644 --- a/api/src/main/java/org/apache/iceberg/ContentFile.java +++ b/api/src/main/java/org/apache/iceberg/ContentFile.java @@ -29,6 +29,11 @@ * @param the concrete Java class of a ContentFile instance. */ public interface ContentFile { + /** + * @return id of the partition spec used for partition metadata + */ + int specId(); + /** * @return type of content stored in the file; one of DATA, POSITION_DELETES, or EQUALITY_DELETES */ diff --git a/api/src/test/java/org/apache/iceberg/TestHelpers.java b/api/src/test/java/org/apache/iceberg/TestHelpers.java index b4881b3e256a..c1fce01f3f1a 100644 --- a/api/src/test/java/org/apache/iceberg/TestHelpers.java +++ b/api/src/test/java/org/apache/iceberg/TestHelpers.java @@ -311,6 +311,11 @@ public TestDataFile(String path, StructLike partition, long recordCount, this.upperBounds = upperBounds; } + @Override + public int specId() { + return 0; + } + @Override public CharSequence path() { return path; diff --git a/core/src/main/java/org/apache/iceberg/AllManifestsTable.java b/core/src/main/java/org/apache/iceberg/AllManifestsTable.java index 819433883e09..367826f914e2 100644 --- a/core/src/main/java/org/apache/iceberg/AllManifestsTable.java +++ b/core/src/main/java/org/apache/iceberg/AllManifestsTable.java @@ -128,8 +128,13 @@ protected CloseableIterable planFiles( if (snap.manifestListLocation() != null) { Expression filter = ignoreResiduals ? Expressions.alwaysTrue() : rowFilter; ResidualEvaluator residuals = ResidualEvaluator.unpartitioned(filter); + DataFile manifestListAsDataFile = DataFiles.builder(PartitionSpec.unpartitioned()) + .withInputFile(ops.io().newInputFile(snap.manifestListLocation())) + .withRecordCount(1) + .withFormat(FileFormat.AVRO) + .build(); return new ManifestListReadTask(ops.io(), table().spec(), new BaseFileScanTask( - DataFiles.fromManifestList(ops.io().newInputFile(snap.manifestListLocation())), null, + manifestListAsDataFile, null, schemaString, specString, residuals)); } else { return StaticDataTask.of( diff --git a/core/src/main/java/org/apache/iceberg/BaseFile.java b/core/src/main/java/org/apache/iceberg/BaseFile.java index a408c7a4df6c..db8320e00610 100644 --- a/core/src/main/java/org/apache/iceberg/BaseFile.java +++ b/core/src/main/java/org/apache/iceberg/BaseFile.java @@ -53,6 +53,7 @@ public PartitionData copy() { private int[] fromProjectionPos; private Types.StructType partitionType; + private int partitionSpecId = -1; private FileContent content = FileContent.DATA; private String filePath = null; private FileFormat format = null; @@ -108,11 +109,12 @@ public PartitionData copy() { this.partitionData = new PartitionData(partitionType); } - BaseFile(FileContent content, String filePath, FileFormat format, + BaseFile(int specId, FileContent content, String filePath, FileFormat format, PartitionData partition, long fileSizeInBytes, long recordCount, Map columnSizes, Map valueCounts, Map nullValueCounts, Map lowerBounds, Map upperBounds, List splitOffsets, ByteBuffer keyMetadata) { + this.partitionSpecId = specId; this.content = content; this.filePath = filePath; this.format = format; @@ -145,6 +147,7 @@ public PartitionData copy() { * @param fullCopy whether to copy all fields or to drop column-level stats */ BaseFile(BaseFile toCopy, boolean fullCopy) { + this.partitionSpecId = toCopy.partitionSpecId; this.content = toCopy.content; this.filePath = toCopy.filePath; this.format = toCopy.format; @@ -178,6 +181,15 @@ public PartitionData copy() { BaseFile() { } + @Override + public int specId() { + return partitionSpecId; + } + + void setSpecId(int specId) { + this.partitionSpecId = specId; + } + protected abstract Schema getAvroSchema(Types.StructType partitionStruct); @Override diff --git a/core/src/main/java/org/apache/iceberg/DataFiles.java b/core/src/main/java/org/apache/iceberg/DataFiles.java index feb23039f726..b9fae62b97e3 100644 --- a/core/src/main/java/org/apache/iceberg/DataFiles.java +++ b/core/src/main/java/org/apache/iceberg/DataFiles.java @@ -94,88 +94,27 @@ public static PartitionData copy(PartitionSpec spec, StructLike partition) { return copyPartitionData(spec, partition, null); } - public static DataFile fromStat(FileStatus stat, long rowCount) { - String location = stat.getPath().toString(); - FileFormat format = FileFormat.fromFileName(location); - return new GenericDataFile(location, format, rowCount, stat.getLen()); - } - - public static DataFile fromStat(FileStatus stat, PartitionData partition, long rowCount) { - String location = stat.getPath().toString(); - FileFormat format = FileFormat.fromFileName(location); - return new GenericDataFile( - location, format, partition, rowCount, stat.getLen()); - } - - public static DataFile fromStat(FileStatus stat, PartitionData partition, Metrics metrics, - EncryptionKeyMetadata keyMetadata, List splitOffsets) { - String location = stat.getPath().toString(); - FileFormat format = FileFormat.fromFileName(location); - return new GenericDataFile( - location, format, partition, stat.getLen(), metrics, keyMetadata.buffer(), splitOffsets); - } - - public static DataFile fromInputFile(InputFile file, PartitionData partition, long rowCount) { - if (file instanceof HadoopInputFile) { - return fromStat(((HadoopInputFile) file).getStat(), partition, rowCount); - } - - String location = file.location(); - FileFormat format = FileFormat.fromFileName(location); - return new GenericDataFile( - location, format, partition, rowCount, file.getLength()); - } - - public static DataFile fromInputFile(InputFile file, long rowCount) { - if (file instanceof HadoopInputFile) { - return fromStat(((HadoopInputFile) file).getStat(), rowCount); - } - - String location = file.location(); - FileFormat format = FileFormat.fromFileName(location); - return new GenericDataFile(location, format, rowCount, file.getLength()); - } - - public static DataFile fromEncryptedOutputFile(EncryptedOutputFile encryptedFile, PartitionData partition, - Metrics metrics, List splitOffsets) { - EncryptionKeyMetadata keyMetadata = encryptedFile.keyMetadata(); - InputFile file = encryptedFile.encryptingOutputFile().toInputFile(); - if (encryptedFile instanceof HadoopInputFile) { - return fromStat(((HadoopInputFile) file).getStat(), partition, metrics, keyMetadata, splitOffsets); - } - - String location = file.location(); - FileFormat format = FileFormat.fromFileName(location); - return new GenericDataFile( - location, format, partition, file.getLength(), metrics, keyMetadata.buffer(), splitOffsets); - } - public static DataFile fromManifest(ManifestFile manifest) { Preconditions.checkArgument( manifest.addedFilesCount() != null && manifest.existingFilesCount() != null, "Cannot create data file from manifest: data file counts are missing."); - return new GenericDataFile(manifest.path(), - FileFormat.AVRO, - manifest.addedFilesCount() + manifest.existingFilesCount(), - manifest.length()); - } - - public static DataFile fromManifestList(InputFile manifestList) { - return new GenericDataFile(manifestList.location(), FileFormat.AVRO, 1, manifestList.getLength()); + return DataFiles.builder(PartitionSpec.unpartitioned()) + .withPath(manifest.path()) + .withFormat(FileFormat.AVRO) + .withRecordCount(manifest.addedFilesCount() + manifest.existingFilesCount()) + .withFileSizeInBytes(manifest.length()) + .build(); } public static Builder builder(PartitionSpec spec) { return new Builder(spec); } - static Builder builder() { - return new Builder(); - } - public static class Builder { private final PartitionSpec spec; private final boolean isPartitioned; + private final int specId; private PartitionData partitionData; private String filePath = null; private FileFormat format = null; @@ -191,14 +130,9 @@ public static class Builder { private ByteBuffer keyMetadata = null; private List splitOffsets = null; - public Builder() { - this.spec = null; - this.partitionData = null; - this.isPartitioned = false; - } - public Builder(PartitionSpec spec) { this.spec = spec; + this.specId = spec.specId(); this.isPartitioned = spec.fields().size() > 0; this.partitionData = isPartitioned ? newPartitionData(spec) : null; } @@ -221,6 +155,7 @@ public void clear() { public Builder copy(DataFile toCopy) { if (isPartitioned) { + Preconditions.checkState(specId == toCopy.specId(), "Cannot copy a DataFile with a different spec"); this.partitionData = copyPartitionData(spec, toCopy.partition(), partitionData); } this.filePath = toCopy.path().toString(); @@ -338,7 +273,7 @@ public DataFile build() { Preconditions.checkArgument(recordCount >= 0, "Record count is required"); return new GenericDataFile( - filePath, format, isPartitioned ? partitionData.copy() : null, + specId, filePath, format, isPartitioned ? partitionData.copy() : null, fileSizeInBytes, new Metrics( recordCount, columnSizes, valueCounts, nullValueCounts, lowerBounds, upperBounds), keyMetadata, splitOffsets); diff --git a/core/src/main/java/org/apache/iceberg/FileMetadata.java b/core/src/main/java/org/apache/iceberg/FileMetadata.java index 854cf5745e49..7ff31e0768a4 100644 --- a/core/src/main/java/org/apache/iceberg/FileMetadata.java +++ b/core/src/main/java/org/apache/iceberg/FileMetadata.java @@ -38,13 +38,10 @@ public static Builder deleteFileBuilder(PartitionSpec spec) { return new Builder(spec); } - static Builder deleteFileBuilder() { - return new Builder(); - } - public static class Builder { private final PartitionSpec spec; private final boolean isPartitioned; + private final int specId; private FileContent content = null; private PartitionData partitionData; private String filePath = null; @@ -60,14 +57,9 @@ public static class Builder { private Map upperBounds = null; private ByteBuffer keyMetadata = null; - Builder() { - this.spec = null; - this.partitionData = null; - this.isPartitioned = false; - } - Builder(PartitionSpec spec) { this.spec = spec; + this.specId = spec.specId(); this.isPartitioned = spec.fields().size() > 0; this.partitionData = isPartitioned ? DataFiles.newPartitionData(spec) : null; } @@ -89,6 +81,7 @@ public void clear() { public Builder copy(DeleteFile toCopy) { if (isPartitioned) { + Preconditions.checkState(specId == toCopy.specId(), "Cannot copy a DeleteFile with a different spec"); this.partitionData = DataFiles.copyPartitionData(spec, toCopy.partition(), partitionData); } this.content = toCopy.content(); @@ -208,7 +201,7 @@ public DeleteFile build() { Preconditions.checkArgument(recordCount >= 0, "Record count is required"); return new GenericDeleteFile( - content, filePath, format, isPartitioned ? DataFiles.copy(spec, partitionData) : null, + specId, content, filePath, format, isPartitioned ? DataFiles.copy(spec, partitionData) : null, fileSizeInBytes, new Metrics( recordCount, columnSizes, valueCounts, nullValueCounts, lowerBounds, upperBounds), keyMetadata); diff --git a/core/src/main/java/org/apache/iceberg/GenericDataFile.java b/core/src/main/java/org/apache/iceberg/GenericDataFile.java index 1871790be24a..1075c63fb551 100644 --- a/core/src/main/java/org/apache/iceberg/GenericDataFile.java +++ b/core/src/main/java/org/apache/iceberg/GenericDataFile.java @@ -34,26 +34,10 @@ class GenericDataFile extends BaseFile implements DataFile { super(avroSchema); } - GenericDataFile(String filePath, FileFormat format, long recordCount, - long fileSizeInBytes) { - this(filePath, format, null, recordCount, fileSizeInBytes); - } - - GenericDataFile(String filePath, FileFormat format, PartitionData partition, - long recordCount, long fileSizeInBytes) { - super(FileContent.DATA, filePath, format, partition, fileSizeInBytes, recordCount, - null, null, null, null, null, null, null); - } - - GenericDataFile(String filePath, FileFormat format, PartitionData partition, - long fileSizeInBytes, Metrics metrics, List splitOffsets) { - this(filePath, format, partition, fileSizeInBytes, metrics, null, splitOffsets); - } - - GenericDataFile(String filePath, FileFormat format, PartitionData partition, + GenericDataFile(int specId, String filePath, FileFormat format, PartitionData partition, long fileSizeInBytes, Metrics metrics, ByteBuffer keyMetadata, List splitOffsets) { - super(FileContent.DATA, filePath, format, partition, fileSizeInBytes, metrics.recordCount(), + super(specId, FileContent.DATA, filePath, format, partition, fileSizeInBytes, metrics.recordCount(), metrics.columnSizes(), metrics.valueCounts(), metrics.nullValueCounts(), metrics.lowerBounds(), metrics.upperBounds(), splitOffsets, keyMetadata); } diff --git a/core/src/main/java/org/apache/iceberg/GenericDeleteFile.java b/core/src/main/java/org/apache/iceberg/GenericDeleteFile.java index 6f9b6379c01c..0ad76a9f028b 100644 --- a/core/src/main/java/org/apache/iceberg/GenericDeleteFile.java +++ b/core/src/main/java/org/apache/iceberg/GenericDeleteFile.java @@ -34,9 +34,9 @@ class GenericDeleteFile extends BaseFile implements DeleteFile { super(avroSchema); } - GenericDeleteFile(FileContent content, String filePath, FileFormat format, PartitionData partition, + GenericDeleteFile(int specId, FileContent content, String filePath, FileFormat format, PartitionData partition, long fileSizeInBytes, Metrics metrics, ByteBuffer keyMetadata) { - super(content, filePath, format, partition, fileSizeInBytes, metrics.recordCount(), + super(specId, content, filePath, format, partition, fileSizeInBytes, metrics.recordCount(), metrics.columnSizes(), metrics.valueCounts(), metrics.nullValueCounts(), metrics.lowerBounds(), metrics.upperBounds(), null, keyMetadata); } diff --git a/core/src/main/java/org/apache/iceberg/InheritableMetadataFactory.java b/core/src/main/java/org/apache/iceberg/InheritableMetadataFactory.java index d8ceb40da3a9..ad396ea9bf38 100644 --- a/core/src/main/java/org/apache/iceberg/InheritableMetadataFactory.java +++ b/core/src/main/java/org/apache/iceberg/InheritableMetadataFactory.java @@ -34,7 +34,7 @@ static InheritableMetadata empty() { static InheritableMetadata fromManifest(ManifestFile manifest) { Preconditions.checkArgument(manifest.snapshotId() != null, "Cannot read from ManifestFile with null (unassigned) snapshot ID"); - return new BaseInheritableMetadata(manifest.snapshotId(), manifest.sequenceNumber()); + return new BaseInheritableMetadata(manifest.partitionSpecId(), manifest.snapshotId(), manifest.sequenceNumber()); } static InheritableMetadata forCopy(long snapshotId) { @@ -42,16 +42,22 @@ static InheritableMetadata forCopy(long snapshotId) { } static class BaseInheritableMetadata implements InheritableMetadata { + private final int specId; private final long snapshotId; private final long sequenceNumber; - private BaseInheritableMetadata(long snapshotId, long sequenceNumber) { + private BaseInheritableMetadata(int specId, long snapshotId, long sequenceNumber) { + this.specId = specId; this.snapshotId = snapshotId; this.sequenceNumber = sequenceNumber; } @Override public > ManifestEntry apply(ManifestEntry manifestEntry) { + if (manifestEntry.file() instanceof BaseFile) { + BaseFile file = (BaseFile) manifestEntry.file(); + file.setSpecId(specId); + } if (manifestEntry.snapshotId() == null) { manifestEntry.setSnapshotId(snapshotId); } diff --git a/core/src/main/java/org/apache/iceberg/StaticDataTask.java b/core/src/main/java/org/apache/iceberg/StaticDataTask.java index 11ce15635472..24bff01e3da7 100644 --- a/core/src/main/java/org/apache/iceberg/StaticDataTask.java +++ b/core/src/main/java/org/apache/iceberg/StaticDataTask.java @@ -42,7 +42,7 @@ static DataTask of(InputFile metadata, Iterable values, Function private final StructLike[] rows; private StaticDataTask(InputFile metadata, StructLike[] rows) { - this.metadataFile = DataFiles.builder() + this.metadataFile = DataFiles.builder(PartitionSpec.unpartitioned()) .withInputFile(metadata) .withRecordCount(rows.length) .withFormat(FileFormat.METADATA) diff --git a/core/src/main/java/org/apache/iceberg/V1Metadata.java b/core/src/main/java/org/apache/iceberg/V1Metadata.java index 5615ee182fc9..cb6c922349f9 100644 --- a/core/src/main/java/org/apache/iceberg/V1Metadata.java +++ b/core/src/main/java/org/apache/iceberg/V1Metadata.java @@ -364,6 +364,11 @@ public org.apache.avro.Schema getSchema() { return avroSchema; } + @Override + public int specId() { + return wrapped.specId(); + } + @Override public FileContent content() { return wrapped.content(); diff --git a/core/src/main/java/org/apache/iceberg/V2Metadata.java b/core/src/main/java/org/apache/iceberg/V2Metadata.java index c3d3d5e9bf03..c4999508ddb0 100644 --- a/core/src/main/java/org/apache/iceberg/V2Metadata.java +++ b/core/src/main/java/org/apache/iceberg/V2Metadata.java @@ -412,6 +412,11 @@ public void put(int i, Object v) { throw new UnsupportedOperationException("Cannot read into IndexedDataFile"); } + @Override + public int specId() { + return wrapped.specId(); + } + @Override public FileContent content() { return wrapped.content(); diff --git a/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java b/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java index 32e2d9a86596..16215394c7b9 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java @@ -68,10 +68,10 @@ public class TestManifestWriterVersions { private static final List OFFSETS = ImmutableList.of(4L); private static final DataFile DATA_FILE = new GenericDataFile( - PATH, FORMAT, PARTITION, 150972L, METRICS, null, OFFSETS); + 0, PATH, FORMAT, PARTITION, 150972L, METRICS, null, OFFSETS); private static final DeleteFile DELETE_FILE = new GenericDeleteFile( - FileContent.EQUALITY_DELETES, PATH, FORMAT, PARTITION, 22905L, METRICS, null); + 0, FileContent.EQUALITY_DELETES, PATH, FORMAT, PARTITION, 22905L, METRICS, null); @Rule public TemporaryFolder temp = new TemporaryFolder(); diff --git a/core/src/test/java/org/apache/iceberg/TestMergeAppend.java b/core/src/test/java/org/apache/iceberg/TestMergeAppend.java index e88f9bf12c6b..dc4780263ecd 100644 --- a/core/src/test/java/org/apache/iceberg/TestMergeAppend.java +++ b/core/src/test/java/org/apache/iceberg/TestMergeAppend.java @@ -681,13 +681,15 @@ public void testChangedPartitionSpec() { V2Assert.assertEquals("Last sequence number should be 1", 1, readMetadata().lastSequenceNumber()); V1Assert.assertEquals("Table should end with last-sequence-number 0", 0, readMetadata().lastSequenceNumber()); - DataFile newFileC = DataFiles.builder(newSpec) - .copy(FILE_C) + DataFile newFileY = DataFiles.builder(newSpec) + .withPath("/path/to/data-y.parquet") + .withFileSizeInBytes(10) .withPartitionPath("data_bucket=2/id_bucket=3") + .withRecordCount(1) .build(); table.newAppend() - .appendFile(newFileC) + .appendFile(newFileY) .commit(); Snapshot lastSnapshot = table.currentSnapshot(); @@ -702,7 +704,7 @@ public void testChangedPartitionSpec() { validateManifest(lastSnapshot.allManifests().get(0), seqs(2), ids(lastSnapshot.snapshotId()), - files(newFileC), + files(newFileY), statuses(Status.ADDED) ); @@ -747,13 +749,15 @@ public void testChangedPartitionSpecMergeExisting() { V2Assert.assertEquals("Last sequence number should be 2", 2, readMetadata().lastSequenceNumber()); V1Assert.assertEquals("Table should end with last-sequence-number 0", 0, readMetadata().lastSequenceNumber()); - DataFile newFileC = DataFiles.builder(newSpec) - .copy(FILE_C) + DataFile newFileY = DataFiles.builder(newSpec) + .withPath("/path/to/data-y.parquet") + .withFileSizeInBytes(10) .withPartitionPath("data_bucket=2/id_bucket=3") + .withRecordCount(1) .build(); table.newAppend() - .appendFile(newFileC) + .appendFile(newFileY) .commit(); Snapshot lastSnapshot = table.currentSnapshot(); V2Assert.assertEquals("Snapshot sequence number should be 3", 3, lastSnapshot.sequenceNumber()); @@ -768,7 +772,7 @@ public void testChangedPartitionSpecMergeExisting() { validateManifest(lastSnapshot.allManifests().get(0), seqs(3), ids(lastSnapshot.snapshotId()), - files(newFileC), + files(newFileY), statuses(Status.ADDED) ); validateManifest(lastSnapshot.allManifests().get(1), @@ -1129,8 +1133,12 @@ public void testManifestEntryFieldIdsForChangedPartitionSpecForV1Table() { V2Assert.assertEquals("Last sequence number should be 1", 1, readMetadata().lastSequenceNumber()); V1Assert.assertEquals("Table should end with last-sequence-number 0", 0, readMetadata().lastSequenceNumber()); + // create a new with the table's current spec DataFile newFile = DataFiles.builder(table.spec()) - .copy(FILE_B) + .withPath("/path/to/data-x.parquet") + .withFileSizeInBytes(10) + .withPartitionPath("id_bucket=1/data_bucket=1") + .withRecordCount(1) .build(); table.newAppend() diff --git a/core/src/test/java/org/apache/iceberg/TestRewriteManifests.java b/core/src/test/java/org/apache/iceberg/TestRewriteManifests.java index 7a2c5fbc7df1..5c179eedf935 100644 --- a/core/src/test/java/org/apache/iceberg/TestRewriteManifests.java +++ b/core/src/test/java/org/apache/iceberg/TestRewriteManifests.java @@ -579,22 +579,26 @@ public void testWithMultiplePartitionSpec() throws IOException { // commit the new partition spec to the table manually table.ops().commit(base, base.updatePartitionSpec(newSpec)); - DataFile newFileC = DataFiles.builder(newSpec) - .copy(FILE_C) + DataFile newFileY = DataFiles.builder(newSpec) + .withPath("/path/to/data-y.parquet") + .withFileSizeInBytes(10) .withPartitionPath("data_bucket=2/id_bucket=3") + .withRecordCount(1) .build(); table.newAppend() - .appendFile(newFileC) + .appendFile(newFileY) .commit(); - DataFile newFileD = DataFiles.builder(newSpec) - .copy(FILE_D) + DataFile newFileZ = DataFiles.builder(newSpec) + .withPath("/path/to/data-z.parquet") + .withFileSizeInBytes(10) .withPartitionPath("data_bucket=2/id_bucket=4") + .withRecordCount(1) .build(); table.newAppend() - .appendFile(newFileD) + .appendFile(newFileZ) .commit(); Assert.assertEquals("Should use 3 manifest files", @@ -648,22 +652,26 @@ public void testManifestSizeWithMultiplePartitionSpec() throws IOException { // commit the new partition spec to the table manually table.ops().commit(base, base.updatePartitionSpec(newSpec)); - DataFile newFileC = DataFiles.builder(newSpec) - .copy(FILE_C) + DataFile newFileY = DataFiles.builder(newSpec) + .withPath("/path/to/data-y.parquet") + .withFileSizeInBytes(10) .withPartitionPath("data_bucket=2/id_bucket=3") + .withRecordCount(1) .build(); table.newAppend() - .appendFile(newFileC) + .appendFile(newFileY) .commit(); - DataFile newFileD = DataFiles.builder(newSpec) - .copy(FILE_D) + DataFile newFileZ = DataFiles.builder(newSpec) + .withPath("/path/to/data-z.parquet") + .withFileSizeInBytes(10) .withPartitionPath("data_bucket=2/id_bucket=4") + .withRecordCount(1) .build(); table.newAppend() - .appendFile(newFileD) + .appendFile(newFileZ) .commit(); Assert.assertEquals("Rewrite manifests should produce 3 manifest files", diff --git a/data/src/test/java/org/apache/iceberg/data/TestLocalScan.java b/data/src/test/java/org/apache/iceberg/data/TestLocalScan.java index 10ace6736488..5acd2a32ea44 100644 --- a/data/src/test/java/org/apache/iceberg/data/TestLocalScan.java +++ b/data/src/test/java/org/apache/iceberg/data/TestLocalScan.java @@ -64,7 +64,6 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import static org.apache.iceberg.DataFiles.fromInputFile; import static org.apache.iceberg.expressions.Expressions.equal; import static org.apache.iceberg.expressions.Expressions.lessThan; import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual; @@ -255,7 +254,11 @@ public void testRandomData() throws IOException { } writeFile(location.toString(), format.addExtension("file-" + fileNum), records); - append.appendFile(fromInputFile(HadoopInputFile.fromPath(path, CONF), numRecords)); + DataFile file = DataFiles.builder(PartitionSpec.unpartitioned()) + .withRecordCount(numRecords) + .withInputFile(HadoopInputFile.fromPath(path, CONF)) + .build(); + append.appendFile(file); fileNum += 1; } diff --git a/spark/src/main/java/org/apache/iceberg/spark/SparkDataFile.java b/spark/src/main/java/org/apache/iceberg/spark/SparkDataFile.java index fe7c00401fc1..48dd00124273 100644 --- a/spark/src/main/java/org/apache/iceberg/spark/SparkDataFile.java +++ b/spark/src/main/java/org/apache/iceberg/spark/SparkDataFile.java @@ -87,6 +87,11 @@ public SparkDataFile wrap(Row row) { return this; } + @Override + public int specId() { + return -1; + } + @Override public CharSequence path() { return wrapped.getAs(filePathPosition); diff --git a/spark/src/test/java/org/apache/iceberg/spark/source/TestPartitionValues.java b/spark/src/test/java/org/apache/iceberg/spark/source/TestPartitionValues.java index c46b19166ae5..3a60167a5f4e 100644 --- a/spark/src/test/java/org/apache/iceberg/spark/source/TestPartitionValues.java +++ b/spark/src/test/java/org/apache/iceberg/spark/source/TestPartitionValues.java @@ -265,7 +265,10 @@ public void testPartitionValueTypes() throws Exception { // add the Avro data file to the source table source.newAppend() - .appendFile(DataFiles.fromInputFile(Files.localInput(avroData), 10)) + .appendFile(DataFiles.builder(PartitionSpec.unpartitioned()) + .withRecordCount(10) + .withInputFile(Files.localInput(avroData)) + .build()) .commit(); Dataset sourceDF = spark.read().format("iceberg") @@ -330,7 +333,10 @@ public void testNestedPartitionValues() throws Exception { // add the Avro data file to the source table source.newAppend() - .appendFile(DataFiles.fromInputFile(Files.localInput(avroData), 10)) + .appendFile(DataFiles.builder(PartitionSpec.unpartitioned()) + .withRecordCount(10) + .withInputFile(Files.localInput(avroData)) + .build()) .commit(); Dataset sourceDF = spark.read().format("iceberg")