Skip to content
This repository was archived by the owner on Jun 15, 2021. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,6 @@ bin/

# Hive/metastore files
metastore_db/

# Python stuff
python/.mypy_cache/
49 changes: 26 additions & 23 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -99,17 +99,11 @@ subprojects {
}
}

apply from: 'baseline.gradle'
apply from: 'deploy.gradle'
apply from: 'tasks.gradle'
apply from: 'jmh.gradle'

project(':iceberg-bundled-guava') {
apply plugin: 'com.github.johnrengelman.shadow'

tasks.assemble.dependsOn tasks.shadowJar
tasks.install.dependsOn tasks.shadowJar


tasks.jar.dependsOn tasks.shadowJar

dependencies {
compileOnly('com.google.guava:guava') {
exclude group: 'com.google.code.findbugs'
Expand All @@ -118,13 +112,9 @@ project(':iceberg-bundled-guava') {
exclude group: 'com.google.j2objc'
}
}

jar {
enabled = false
}


shadowJar {
classifier = null
classifier null
configurations = [project.configurations.compileOnly]
zip64 true

Expand All @@ -144,6 +134,10 @@ project(':iceberg-bundled-guava') {

minimize()
}

jar {
classifier 'empty'
}
}

project(':iceberg-api') {
Expand Down Expand Up @@ -428,12 +422,11 @@ project(':iceberg-pig') {
project(':iceberg-spark-runtime') {
apply plugin: 'com.github.johnrengelman.shadow'

tasks.assemble.dependsOn tasks.shadowJar
tasks.install.dependsOn tasks.shadowJar
tasks.javadocJar.dependsOn tasks.shadowJar
tasks.jar.dependsOn tasks.shadowJar

configurations {
compileOnly {
compile {
exclude group: 'org.apache.spark'
// included in Spark
exclude group: 'org.slf4j'
exclude group: 'org.apache.commons'
Expand All @@ -446,12 +439,12 @@ project(':iceberg-spark-runtime') {
}

dependencies {
compileOnly project(':iceberg-spark')
compile project(':iceberg-spark')
compile 'org.apache.spark:spark-hive_2.11'
}

shadowJar {
// shade compileOnly dependencies to avoid including in transitive dependencies
configurations = [project.configurations.compileOnly]
configurations = [project.configurations.compile]

zip64 true

Expand Down Expand Up @@ -481,7 +474,11 @@ project(':iceberg-spark-runtime') {
relocate 'org.apache.arrow', 'org.apache.iceberg.shaded.org.apache.arrow'
relocate 'com.carrotsearch', 'org.apache.iceberg.shaded.com.carrotsearch'

archiveName = "iceberg-spark-runtime-${version}.${extension}"
classifier null
}

jar {
classifier = 'empty'
}
}

Expand Down Expand Up @@ -519,3 +516,9 @@ String getJavadocVersion() {
throw new Exception("Neither version.txt nor git version exists")
}
}

apply from: 'baseline.gradle'
apply from: 'deploy.gradle'
apply from: 'tasks.gradle'
apply from: 'jmh.gradle'

1 change: 0 additions & 1 deletion bundled-guava/LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -209,4 +209,3 @@ Copyright: 2006-2019 The Guava Authors
Home page: https://github.com/google/guava
License: http://www.apache.org/licenses/LICENSE-2.0

--------------------------------------------------------------------------------
5 changes: 0 additions & 5 deletions bundled-guava/NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,3 @@ Copyright 2017-2020 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).

--------------------------------------------------------------------------------

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason we don't mention that it contains Guava in this manner? Just curious.

| This product includes software from Google Guava (Apache 2.0)
| * Copyright (C) 2007 The Guava Authors
| * https://github.com/google/guava
--------------------------------------------------------------------------------
69 changes: 66 additions & 3 deletions core/src/test/java/org/apache/iceberg/TableTestBase.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.stream.LongStream;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.io.OutputFile;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -87,9 +88,15 @@ public class TableTestBase {
public TestTables.TestTable table = null;

protected final int formatVersion;
@SuppressWarnings("checkstyle:MemberName")
protected final Assertions V1Assert;
@SuppressWarnings("checkstyle:MemberName")
protected final Assertions V2Assert;

public TableTestBase(int formatVersion) {
this.formatVersion = formatVersion;
this.V1Assert = new Assertions(1, formatVersion);
this.V2Assert = new Assertions(2, formatVersion);
}

@Before
Expand Down Expand Up @@ -209,6 +216,14 @@ ManifestEntry manifestEntry(ManifestEntry.Status status, Long snapshotId, DataFi
}

void validateSnapshot(Snapshot old, Snapshot snap, DataFile... newFiles) {
validateSnapshot(old, snap, null, newFiles);
}

void validateSnapshot(Snapshot old, Snapshot snap, long sequenceNumber, DataFile... newFiles) {
validateSnapshot(old, snap, (Long) sequenceNumber, newFiles);
}

void validateSnapshot(Snapshot old, Snapshot snap, Long sequenceNumber, DataFile... newFiles) {
List<ManifestFile> oldManifests = old != null ? old.manifests() : ImmutableList.of();

// copy the manifests to a modifiable list and remove the existing manifests
Expand All @@ -227,6 +242,10 @@ void validateSnapshot(Snapshot old, Snapshot snap, DataFile... newFiles) {

for (ManifestEntry entry : ManifestFiles.read(manifest, FILE_IO).entries()) {
DataFile file = entry.file();
if (sequenceNumber != null) {
V1Assert.assertEquals("Sequence number should default to 0", 0, entry.sequenceNumber().longValue());
V2Assert.assertEquals("Sequence number should match expected", sequenceNumber, entry.sequenceNumber());
}
Assert.assertEquals("Path should match expected", newPaths.next(), file.path().toString());
Assert.assertEquals("File's snapshot ID should match", id, (long) entry.snapshotId());
}
Expand Down Expand Up @@ -254,12 +273,23 @@ List<String> paths(DataFile... dataFiles) {
return paths;
}

static void validateManifest(ManifestFile manifest,
Iterator<Long> ids,
Iterator<DataFile> expectedFiles) {
void validateManifest(ManifestFile manifest,
Iterator<Long> ids,
Iterator<DataFile> expectedFiles) {
validateManifest(manifest, null, ids, expectedFiles);
}

void validateManifest(ManifestFile manifest,
Iterator<Long> seqs,
Iterator<Long> ids,
Iterator<DataFile> expectedFiles) {
for (ManifestEntry entry : ManifestFiles.read(manifest, FILE_IO).entries()) {
DataFile file = entry.file();
DataFile expected = expectedFiles.next();
if (seqs != null) {
V1Assert.assertEquals("Sequence number should default to 0", 0, entry.sequenceNumber().longValue());
V2Assert.assertEquals("Sequence number should match expected", seqs.next(), entry.sequenceNumber());
}
Assert.assertEquals("Path should match expected",
expected.path().toString(), file.path().toString());
Assert.assertEquals("Snapshot ID should match expected ID",
Expand Down Expand Up @@ -292,6 +322,10 @@ static Iterator<ManifestEntry.Status> statuses(ManifestEntry.Status... statuses)
return Iterators.forArray(statuses);
}

static Iterator<Long> seqs(long... seqs) {
return LongStream.of(seqs).iterator();
}

static Iterator<Long> ids(Long... ids) {
return Iterators.forArray(ids);
}
Expand All @@ -303,4 +337,33 @@ static Iterator<DataFile> files(DataFile... files) {
static Iterator<DataFile> files(ManifestFile manifest) {
return ManifestFiles.read(manifest, FILE_IO).iterator();
}

/**
* Used for assertions that only apply if the table version is v2.
*/
protected static class Assertions {
private final boolean enabled;

private Assertions(int validForVersion, int formatVersion) {
this.enabled = validForVersion == formatVersion;
}

void assertEquals(String context, int expected, int actual) {
if (enabled) {
Assert.assertEquals(context, expected, actual);
}
}

void assertEquals(String context, long expected, long actual) {
if (enabled) {
Assert.assertEquals(context, expected, actual);
}
}

void assertEquals(String context, Object expected, Object actual) {
if (enabled) {
Assert.assertEquals(context, expected, actual);
}
}
}
}
54 changes: 40 additions & 14 deletions core/src/test/java/org/apache/iceberg/TestFastAppend.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,21 @@ public void testEmptyTableAppend() {

TableMetadata base = readMetadata();
Assert.assertNull("Should not have a current snapshot", base.currentSnapshot());
Assert.assertEquals("Table should start with last-sequence-number 0", 0, base.lastSequenceNumber());

Snapshot pending = table.newFastAppend()
table.newFastAppend()
.appendFile(FILE_A)
.appendFile(FILE_B)
.apply();
.commit();

Snapshot snap = table.currentSnapshot();

validateSnapshot(base.currentSnapshot(), snap, 1, FILE_A, FILE_B);

validateSnapshot(base.currentSnapshot(), pending, FILE_A, FILE_B);
V2Assert.assertEquals("Snapshot sequence number should be 1", 1, snap.sequenceNumber());
V2Assert.assertEquals("Last sequence number should be 1", 1, readMetadata().lastSequenceNumber());

V1Assert.assertEquals("Table should end with last-sequence-number 0", 0, base.lastSequenceNumber());
}

@Test
Expand All @@ -67,17 +75,25 @@ public void testEmptyTableAppendManifest() throws IOException {

TableMetadata base = readMetadata();
Assert.assertNull("Should not have a current snapshot", base.currentSnapshot());
Assert.assertEquals("Table should start with last-sequence-number 0", 0, base.lastSequenceNumber());

ManifestFile manifest = writeManifest(FILE_A, FILE_B);
Snapshot pending = table.newFastAppend()
table.newFastAppend()
.appendManifest(manifest)
.apply();
.commit();

validateSnapshot(base.currentSnapshot(), pending, FILE_A, FILE_B);
Snapshot snap = table.currentSnapshot();

validateSnapshot(base.currentSnapshot(), snap, 1, FILE_A, FILE_B);

// validate that the metadata summary is correct when using appendManifest
Assert.assertEquals("Summary metadata should include 2 added files",
"2", pending.summary().get("added-data-files"));
"2", snap.summary().get("added-data-files"));

V2Assert.assertEquals("Snapshot sequence number should be 1", 1, snap.sequenceNumber());
V2Assert.assertEquals("Last sequence number should be 1", 1, readMetadata().lastSequenceNumber());

V1Assert.assertEquals("Table should end with last-sequence-number 0", 0, base.lastSequenceNumber());
}

@Test
Expand All @@ -86,22 +102,32 @@ public void testEmptyTableAppendFilesAndManifest() throws IOException {

TableMetadata base = readMetadata();
Assert.assertNull("Should not have a current snapshot", base.currentSnapshot());
Assert.assertEquals("Table should start with last-sequence-number 0", 0, base.lastSequenceNumber());

ManifestFile manifest = writeManifest(FILE_A, FILE_B);
Snapshot pending = table.newFastAppend()
table.newFastAppend()
.appendFile(FILE_C)
.appendFile(FILE_D)
.appendManifest(manifest)
.apply();
.commit();

Snapshot snap = table.currentSnapshot();

long pendingId = pending.snapshotId();
long commitId = snap.snapshotId();

validateManifest(pending.manifests().get(0),
ids(pendingId, pendingId),
validateManifest(snap.manifests().get(0),
seqs(1, 1),
ids(commitId, commitId),
files(FILE_C, FILE_D));
validateManifest(pending.manifests().get(1),
ids(pendingId, pendingId),
validateManifest(snap.manifests().get(1),
seqs(1, 1),
ids(commitId, commitId),
files(FILE_A, FILE_B));

V2Assert.assertEquals("Snapshot sequence number should be 1", 1, snap.sequenceNumber());
V2Assert.assertEquals("Last sequence number should be 1", 1, readMetadata().lastSequenceNumber());

V1Assert.assertEquals("Table should end with last-sequence-number 0", 0, base.lastSequenceNumber());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,8 @@ public ParquetValueReader<?> primitive(org.apache.iceberg.types.Type.PrimitiveTy
}
case TIME_MICROS:
return new TimeReader(desc);
case TIME_MILLIS:
return new TimeMillisReader(desc);
case DECIMAL:
DecimalMetadata decimal = primitive.getDecimalMetadata();
switch (primitive.getPrimitiveTypeName()) {
Expand Down Expand Up @@ -356,6 +358,17 @@ public OffsetDateTime read(OffsetDateTime reuse) {
}
}

private static class TimeMillisReader extends PrimitiveReader<LocalTime> {
private TimeMillisReader(ColumnDescriptor desc) {
super(desc);
}

@Override
public LocalTime read(LocalTime reuse) {
return LocalTime.ofNanoOfDay(column.nextLong() * 1000000L);
}
}

private static class TimeReader extends PrimitiveReader<LocalTime> {
private TimeReader(ColumnDescriptor desc) {
super(desc);
Expand Down
6 changes: 5 additions & 1 deletion deploy.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,11 @@ subprojects {
publishing {
publications {
apache(MavenPublication) {
from components.java
if (tasks.matching({task -> task.name == 'shadowJar'}).isEmpty()) {
from components.java
} else {
project.shadow.component(it)
}

artifact sourceJar
artifact javadocJar
Expand Down
Loading