diff --git a/.github/workflows/delta-conversion-ci.yml b/.github/workflows/delta-conversion-ci.yml index 08ec5f0c32a8..ec72fee6a2b9 100644 --- a/.github/workflows/delta-conversion-ci.yml +++ b/.github/workflows/delta-conversion-ci.yml @@ -59,7 +59,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - jvm: [8, 11] + jvm: [8, 11, 17] env: SPARK_LOCAL_IP: localhost steps: @@ -88,7 +88,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - jvm: [ 8, 11 ] + jvm: [8, 11, 17] env: SPARK_LOCAL_IP: localhost steps: diff --git a/.github/workflows/hive-ci.yml b/.github/workflows/hive-ci.yml index db5b9816c034..11e4e3e9303d 100644 --- a/.github/workflows/hive-ci.yml +++ b/.github/workflows/hive-ci.yml @@ -57,7 +57,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - jvm: [8, 11] + jvm: [8, 11, 17] env: SPARK_LOCAL_IP: localhost steps: diff --git a/.github/workflows/java-ci.yml b/.github/workflows/java-ci.yml index 970dbe5ea5ca..19b20bcb07cf 100644 --- a/.github/workflows/java-ci.yml +++ b/.github/workflows/java-ci.yml @@ -54,7 +54,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - jvm: [8, 11] + jvm: [8, 11, 17] env: SPARK_LOCAL_IP: localhost steps: @@ -71,7 +71,7 @@ jobs: key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} restore-keys: ${{ runner.os }}-gradle- - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - - run: ./gradlew check -DsparkVersions= -DhiveVersions= -DflinkVersions= -Pquick=true -x javadoc + - run: ./gradlew check -DsparkVersions= -DhiveVersions= -DflinkVersions= -Pquick=true -x javadoc - uses: actions/upload-artifact@v3 if: failure() with: diff --git a/.github/workflows/spark-ci.yml b/.github/workflows/spark-ci.yml index f01456930dde..b360e0f95eb5 100644 --- a/.github/workflows/spark-ci.yml +++ b/.github/workflows/spark-ci.yml @@ -90,7 +90,7 @@ jobs: strategy: matrix: jvm: [8, 11] - spark: ['3.2','3.3', '3.4'] + spark: ['3.2','3.3','3.4'] env: SPARK_LOCAL_IP: localhost steps: @@ -114,3 +114,33 @@ jobs: name: test logs path: | **/build/testlogs + + spark-3x-java-17-tests: + runs-on: ubuntu-22.04 + strategy: + matrix: + spark: ['3.3','3.4'] + scala-version: ['2.12', '2.13'] + env: + SPARK_LOCAL_IP: localhost + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-java@v3 + with: + distribution: zulu + java-version: 17 + - uses: actions/cache@v3 + with: + path: | + ~/.gradle/caches + ~/.gradle/wrapper + key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} + restore-keys: ${{ runner.os }}-gradle- + - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts + - run: ./gradlew -DsparkVersions=${{ matrix.spark }} -DscalaVersion=${{ matrix.scala-version }} -DhiveVersions= -DflinkVersions= :iceberg-spark:iceberg-spark-${{ matrix.spark }}_${{ matrix.scala-version }}:check :iceberg-spark:iceberg-spark-extensions-${{ matrix.spark }}_${{ matrix.scala-version }}:check :iceberg-spark:iceberg-spark-runtime-${{ matrix.spark }}_${{ matrix.scala-version }}:check -Pquick=true -x javadoc + - uses: actions/upload-artifact@v3 + if: failure() + with: + name: test logs + path: | + **/build/testlogs \ No newline at end of file diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java index aa36fb51b7e9..7eb61cc14e69 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java @@ -45,12 +45,12 @@ public class ExpressionUtil { private static final long THREE_DAYS_IN_HOURS = TimeUnit.DAYS.toHours(3); private static final long NINETY_DAYS_IN_HOURS = TimeUnit.DAYS.toHours(90); private static final Pattern DATE = Pattern.compile("\\d{4}-\\d{2}-\\d{2}"); - private static final Pattern TIME = Pattern.compile("\\d{2}:\\d{2}(:\\d{2}(.\\d{1,6})?)?"); + private static final Pattern TIME = Pattern.compile("\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?"); private static final Pattern TIMESTAMP = - Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,6})?)?"); + Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?"); private static final Pattern TIMESTAMPTZ = Pattern.compile( - "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,6})?)?([-+]\\d{2}:\\d{2}|Z)"); + "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?([-+]\\d{2}:\\d{2}|Z)"); static final int LONG_IN_PREDICATE_ABBREVIATION_THRESHOLD = 10; private static final int LONG_IN_PREDICATE_ABBREVIATION_MIN_GAIN = 5; diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java index 770a9df13a90..a64b3299c148 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java @@ -30,6 +30,7 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.DateTimeUtil; import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Test; @@ -314,14 +315,17 @@ public void testSanitizeDate() { @Test public void testSanitizeTime() { + long micros = DateTimeUtil.microsFromTimestamptz(OffsetDateTime.now()) / 1000000; + String currentTime = DateTimeUtil.microsToIsoTime(micros); + assertEquals( Expressions.equal("test", "(time)"), - ExpressionUtil.sanitize(Expressions.equal("test", "23:49:51"))); + ExpressionUtil.sanitize(Expressions.equal("test", currentTime))); Assert.assertEquals( "Sanitized string should be identical except for descriptive literal", "test = (time)", - ExpressionUtil.toSanitizedString(Expressions.equal("test", "23:49:51"))); + ExpressionUtil.toSanitizedString(Expressions.equal("test", currentTime))); } @Test diff --git a/build.gradle b/build.gradle index 977a1b96fd91..cef0cfbe0ad1 100644 --- a/build.gradle +++ b/build.gradle @@ -68,10 +68,36 @@ try { if (JavaVersion.current() == JavaVersion.VERSION_1_8) { project.ext.jdkVersion = '8' + project.ext.extraJvmArgs = [] } else if (JavaVersion.current() == JavaVersion.VERSION_11) { project.ext.jdkVersion = '11' + project.ext.extraJvmArgs = [] +} else if (JavaVersion.current() == JavaVersion.VERSION_17) { + project.ext.jdkVersion = '17' + project.ext.extraJvmArgs = ["-XX:+IgnoreUnrecognizedVMOptions", + "--add-opens", "java.base/java.io=ALL-UNNAMED", + "--add-opens", "java.base/java.lang.invoke=ALL-UNNAMED", + "--add-opens", "java.base/java.lang.reflect=ALL-UNNAMED", + "--add-opens", "java.base/java.lang=ALL-UNNAMED", + "--add-opens", "java.base/java.math=ALL-UNNAMED", + "--add-opens", "java.base/java.net=ALL-UNNAMED", + "--add-opens", "java.base/java.nio=ALL-UNNAMED", + "--add-opens", "java.base/java.text=ALL-UNNAMED", + "--add-opens", "java.base/java.time=ALL-UNNAMED", + "--add-opens", "java.base/java.util.concurrent.atomic=ALL-UNNAMED", + "--add-opens", "java.base/java.util.concurrent=ALL-UNNAMED", + "--add-opens", "java.base/java.util.regex=ALL-UNNAMED", + "--add-opens", "java.base/java.util=ALL-UNNAMED", + "--add-opens", "java.base/jdk.internal.ref=ALL-UNNAMED", + "--add-opens", "java.base/jdk.internal.reflect=ALL-UNNAMED", + "--add-opens", "java.sql/java.sql=ALL-UNNAMED", + "--add-opens", "java.base/sun.util.calendar=ALL-UNNAMED", + "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED", + "--add-opens", "java.base/sun.nio.cs=ALL-UNNAMED", + "--add-opens", "java.base/sun.security.action=ALL-UNNAMED", + "--add-opens", "java.base/sun.util.calendar=ALL-UNNAMED"] } else { - throw new GradleException("This build must be run with JDK 8 or 11 but was executed with JDK " + JavaVersion.current()) + throw new GradleException("This build must be run with JDK 8 or 11 or 17 but was executed with JDK " + JavaVersion.current()) } apply plugin: 'com.gorylenko.gradle-git-properties' @@ -217,6 +243,8 @@ subprojects { maxHeapSize = "1500m" + jvmArgs += project.property('extraJvmArgs') + testLogging { events "failed" exceptionFormat "full" @@ -490,6 +518,7 @@ project(':iceberg-aws') { task integrationTest(type: Test) { testClassesDirs = sourceSets.integration.output.classesDirs classpath = sourceSets.integration.runtimeClasspath + jvmArgs += project.property('extraJvmArgs') } } @@ -558,6 +587,7 @@ project(':iceberg-delta-lake') { task integrationTest(type: Test) { testClassesDirs = sourceSets.integration.output.classesDirs classpath = sourceSets.integration.runtimeClasspath + jvmArgs += project.property('extraJvmArgs') } check.dependsOn integrationTest } diff --git a/jmh.gradle b/jmh.gradle index 31d544838b3b..bd411af395d4 100644 --- a/jmh.gradle +++ b/jmh.gradle @@ -17,8 +17,8 @@ * under the License. */ -if (jdkVersion != '8' && jdkVersion != '11') { - throw new GradleException("The JMH benchamrks must be run with JDK 8 or JDK 11") +if (jdkVersion != '8' && jdkVersion != '11' && jdkVersion != '17') { + throw new GradleException("The JMH benchamrks must be run with JDK 8 or JDK 11 or JDK 17") } def sparkVersions = (System.getProperty("sparkVersions") != null ? System.getProperty("sparkVersions") : System.getProperty("defaultSparkVersions")).split(",") diff --git a/spark/v3.3/build.gradle b/spark/v3.3/build.gradle index 6dff38e3821f..875a7fe2ca51 100644 --- a/spark/v3.3/build.gradle +++ b/spark/v3.3/build.gradle @@ -275,6 +275,7 @@ project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersio task integrationTest(type: Test) { description = "Test Spark3 Runtime Jar against Spark ${sparkMajorVersion}" group = "verification" + jvmArgs += project.property('extraJvmArgs') testClassesDirs = sourceSets.integration.output.classesDirs classpath = sourceSets.integration.runtimeClasspath + files(shadowJar.archiveFile.get().asFile.path) inputs.file(shadowJar.archiveFile.get().asFile.path) diff --git a/spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java b/spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java index 343943b0f891..416d5eed5b65 100644 --- a/spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java +++ b/spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java @@ -26,6 +26,7 @@ import java.nio.ByteBuffer; import java.util.Base64; import java.util.List; +import java.util.Map; import org.apache.iceberg.DataFile; import org.apache.iceberg.Files; import org.apache.iceberg.PartitionSpec; @@ -191,31 +192,82 @@ private GenericRecord createNestedRecord(Long longCol, Double doubleCol) { @Test public void testPrimitiveColumns() throws Exception { - createPrimitiveTable(); + Table table = createPrimitiveTable(); + DataFile dataFile = table.currentSnapshot().addedDataFiles(table.io()).iterator().next(); + Map columnSizeStats = dataFile.columnSizes(); Object[] binaryCol = row( - 59L, + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("binaryCol").fieldId()), 4L, 2L, null, Base64.getDecoder().decode("1111"), Base64.getDecoder().decode("2222")); - Object[] booleanCol = row(44L, 4L, 0L, null, false, true); - Object[] decimalCol = row(97L, 4L, 1L, null, new BigDecimal("1.00"), new BigDecimal("2.00")); - Object[] doubleCol = row(99L, 4L, 0L, 1L, 1.0D, 2.0D); + Object[] booleanCol = + row( + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("booleanCol").fieldId()), + 4L, + 0L, + null, + false, + true); + Object[] decimalCol = + row( + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("decimalCol").fieldId()), + 4L, + 1L, + null, + new BigDecimal("1.00"), + new BigDecimal("2.00")); + Object[] doubleCol = + row( + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("doubleCol").fieldId()), + 4L, + 0L, + 1L, + 1.0D, + 2.0D); Object[] fixedCol = row( - 55L, + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("fixedCol").fieldId()), 4L, 2L, null, Base64.getDecoder().decode("1111"), Base64.getDecoder().decode("2222")); - Object[] floatCol = row(90L, 4L, 0L, 2L, 0f, 0f); - Object[] intCol = row(91L, 4L, 0L, null, 1, 2); - Object[] longCol = row(91L, 4L, 0L, null, 1L, 2L); - Object[] stringCol = row(99L, 4L, 0L, null, "1", "2"); + Object[] floatCol = + row( + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("floatCol").fieldId()), + 4L, + 0L, + 2L, + 0f, + 0f); + Object[] intCol = + row( + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("intCol").fieldId()), + 4L, + 0L, + null, + 1, + 2); + Object[] longCol = + row( + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("longCol").fieldId()), + 4L, + 0L, + null, + 1L, + 2L); + Object[] stringCol = + row( + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("stringCol").fieldId()), + 4L, + 0L, + null, + "1", + "2"); Object[] metrics = row( diff --git a/spark/v3.4/build.gradle b/spark/v3.4/build.gradle index 5cf131098742..bbd60f74b7d9 100644 --- a/spark/v3.4/build.gradle +++ b/spark/v3.4/build.gradle @@ -275,6 +275,7 @@ project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersio task integrationTest(type: Test) { description = "Test Spark3 Runtime Jar against Spark ${sparkMajorVersion}" group = "verification" + jvmArgs += project.property('extraJvmArgs') testClassesDirs = sourceSets.integration.output.classesDirs classpath = sourceSets.integration.runtimeClasspath + files(shadowJar.archiveFile.get().asFile.path) inputs.file(shadowJar.archiveFile.get().asFile.path) diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java index 343943b0f891..416d5eed5b65 100644 --- a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java +++ b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java @@ -26,6 +26,7 @@ import java.nio.ByteBuffer; import java.util.Base64; import java.util.List; +import java.util.Map; import org.apache.iceberg.DataFile; import org.apache.iceberg.Files; import org.apache.iceberg.PartitionSpec; @@ -191,31 +192,82 @@ private GenericRecord createNestedRecord(Long longCol, Double doubleCol) { @Test public void testPrimitiveColumns() throws Exception { - createPrimitiveTable(); + Table table = createPrimitiveTable(); + DataFile dataFile = table.currentSnapshot().addedDataFiles(table.io()).iterator().next(); + Map columnSizeStats = dataFile.columnSizes(); Object[] binaryCol = row( - 59L, + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("binaryCol").fieldId()), 4L, 2L, null, Base64.getDecoder().decode("1111"), Base64.getDecoder().decode("2222")); - Object[] booleanCol = row(44L, 4L, 0L, null, false, true); - Object[] decimalCol = row(97L, 4L, 1L, null, new BigDecimal("1.00"), new BigDecimal("2.00")); - Object[] doubleCol = row(99L, 4L, 0L, 1L, 1.0D, 2.0D); + Object[] booleanCol = + row( + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("booleanCol").fieldId()), + 4L, + 0L, + null, + false, + true); + Object[] decimalCol = + row( + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("decimalCol").fieldId()), + 4L, + 1L, + null, + new BigDecimal("1.00"), + new BigDecimal("2.00")); + Object[] doubleCol = + row( + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("doubleCol").fieldId()), + 4L, + 0L, + 1L, + 1.0D, + 2.0D); Object[] fixedCol = row( - 55L, + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("fixedCol").fieldId()), 4L, 2L, null, Base64.getDecoder().decode("1111"), Base64.getDecoder().decode("2222")); - Object[] floatCol = row(90L, 4L, 0L, 2L, 0f, 0f); - Object[] intCol = row(91L, 4L, 0L, null, 1, 2); - Object[] longCol = row(91L, 4L, 0L, null, 1L, 2L); - Object[] stringCol = row(99L, 4L, 0L, null, "1", "2"); + Object[] floatCol = + row( + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("floatCol").fieldId()), + 4L, + 0L, + 2L, + 0f, + 0f); + Object[] intCol = + row( + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("intCol").fieldId()), + 4L, + 0L, + null, + 1, + 2); + Object[] longCol = + row( + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("longCol").fieldId()), + 4L, + 0L, + null, + 1L, + 2L); + Object[] stringCol = + row( + columnSizeStats.get(PRIMITIVE_SCHEMA.findField("stringCol").fieldId()), + 4L, + 0L, + null, + "1", + "2"); Object[] metrics = row(