apache · nastra · Nov 13, 2024 · Oct 23, 2024
diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -130,12 +130,6 @@ MR:
       'mr/**/*'
     ]
 
-PIG:
-  - changed-files:
-    - any-glob-to-any-file: [
-      'pig/**/*'
-    ]
-
 AWS:
   - changed-files:
     - any-glob-to-any-file: [

diff --git a/.github/workflows/delta-conversion-ci.yml b/.github/workflows/delta-conversion-ci.yml
@@ -53,7 +53,6 @@ on:
       - 'hive-runtime/**'
       - 'flink/**'
       - 'kafka-connect/**'
-      - 'pig/**'
       - 'docs/**'
       - 'site/**'
       - 'open-api/**'

diff --git a/.github/workflows/flink-ci.yml b/.github/workflows/flink-ci.yml
@@ -53,7 +53,6 @@ on:
     - 'hive-runtime/**'
     - 'kafka-connect/**'
     - 'spark/**'
-    - 'pig/**'
     - 'docs/**'
     - 'site/**'
     - 'open-api/**'

diff --git a/.github/workflows/hive-ci.yml b/.github/workflows/hive-ci.yml
@@ -51,7 +51,6 @@ on:
     - 'spark/**'
     - 'flink/**'
     - 'kafka-connect/**'
-    - 'pig/**'
     - 'docs/**'
     - 'site/**'
     - 'open-api/**'

diff --git a/.github/workflows/kafka-connect-ci.yml b/.github/workflows/kafka-connect-ci.yml
@@ -53,7 +53,6 @@ on:
     - 'hive3-orc-bundle/**'
     - 'hive-runtime/**'
     - 'spark/**'
-    - 'pig/**'
     - 'docs/**'
     - 'site/**'
     - 'open-api/**'

diff --git a/.github/workflows/spark-ci.yml b/.github/workflows/spark-ci.yml
@@ -54,7 +54,6 @@ on:
     - 'hive-runtime/**'
     - 'flink/**'
     - 'kafka-connect/**'
-    - 'pig/**'
     - 'docs/**'
     - 'open-api/**'
     - 'format/**'

diff --git a/README.md b/README.md
@@ -74,7 +74,6 @@ Iceberg also has modules for adding Iceberg support to processing engines:
 * `iceberg-spark` is an implementation of Spark's Datasource V2 API for Iceberg with submodules for each spark versions (use runtime jars for a shaded version)
 * `iceberg-flink` contains classes for integrating with Apache Flink (use iceberg-flink-runtime for a shaded version)
 * `iceberg-mr` contains an InputFormat and other classes for integrating with Apache Hive
-* `iceberg-pig` is an implementation of Pig's LoadFunc API for Iceberg
 
 ---
 **NOTE**

diff --git a/build.gradle b/build.gradle
@@ -838,39 +838,6 @@ project(':iceberg-arrow') {
   }
 }
 
-project(':iceberg-pig') {
-  test {
-    useJUnitPlatform()
-  }
-
-  dependencies {
-    implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow')
-    api project(':iceberg-api')
-    implementation project(':iceberg-common')
-    implementation project(':iceberg-core')
-    implementation project(':iceberg-parquet')
-
-    implementation(libs.parquet.avro) {
-      exclude group: 'org.apache.avro', module: 'avro'
-      // already shaded by Parquet
-      exclude group: 'it.unimi.dsi'
-      exclude group: 'org.codehaus.jackson'
-    }
-
-    compileOnly(libs.pig) {
-      exclude group: "junit", module: "junit"
-    }
-    compileOnly(libs.hadoop2.mapreduce.client.core)
-    compileOnly(libs.hadoop2.client) {
-      exclude group: 'org.apache.avro', module: 'avro'
-    }
-
-    testImplementation(libs.hadoop2.minicluster) {
-      exclude group: 'org.apache.avro', module: 'avro'
-    }
-  }
-}
-
 project(':iceberg-nessie') {
   test {
     useJUnitPlatform()

diff --git a/docs/docs/api.md b/docs/docs/api.md
@@ -251,6 +251,5 @@ This project Iceberg also has modules for adding Iceberg support to processing e
 * `iceberg-mr` is an implementation of MapReduce and Hive InputFormats and SerDes for Iceberg (use iceberg-hive-runtime for a shaded version for use with Hive)
 * `iceberg-nessie` is a module used to integrate Iceberg table metadata history and operations with [Project Nessie](https://projectnessie.org/)
 * `iceberg-data` is a client library used to read Iceberg tables from JVM applications
-* `iceberg-pig` is an implementation of Pig's LoadFunc API for Iceberg
 * `iceberg-runtime` generates a shaded runtime jar for Spark to integrate with iceberg tables
 
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml
@@ -76,7 +76,6 @@ netty-buffer-compat = "4.1.114.Final"
 object-client-bundle = "3.3.2"
 orc = "1.9.4"
 parquet = "1.13.1"
-pig = "0.17.0"
 roaringbitmap = "1.3.0"
 scala-collection-compat = "2.12.0"
 slf4j = "2.0.16"
@@ -169,7 +168,6 @@ orc-core = { module = "org.apache.orc:orc-core", version.ref = "orc" }
 parquet-avro = { module = "org.apache.parquet:parquet-avro", version.ref = "parquet" }
 parquet-column = { module = "org.apache.parquet:parquet-column", version.ref = "parquet" }
 parquet-hadoop = { module = "org.apache.parquet:parquet-hadoop", version.ref = "parquet" }
-pig = { module = "org.apache.pig:pig", version.ref = "pig" }
 roaringbitmap = { module = "org.roaringbitmap:RoaringBitmap", version.ref = "roaringbitmap" }
 scala-collection-compat = { module = "org.scala-lang.modules:scala-collection-compat_2.13", version.ref = "scala-collection-compat"}
 slf4j-api = { module = "org.slf4j:slf4j-api", version.ref = "slf4j" }

diff --git a/mr/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java b/mr/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java
@@ -77,7 +77,6 @@ private InputFormatConfig() {}
   public static final String CATALOG_CONFIG_PREFIX = "iceberg.catalog.";
 
   public enum InMemoryDataModel {
-    PIG,
     HIVE,
     GENERIC // Default data model is of Iceberg Generics
   }
@@ -169,11 +168,6 @@ public ConfigBuilder useHiveRows() {
       return this;
     }
 
-    public ConfigBuilder usePigTuples() {
-      conf.set(IN_MEMORY_DATA_MODEL, InMemoryDataModel.PIG.name());
-      return this;
-    }
-
     /**
      * Compute platforms pass down filters to data sources. If the data source cannot apply some
      * filters, or only partially applies the filter, it will return the residual filter back. If

diff --git a/mr/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInputFormat.java b/mr/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergInputFormat.java
@@ -166,9 +166,7 @@ private List<InputSplit> planInputSplits(
       Table serializableTable = SerializableTable.copyOf(table);
       tasksIterable.forEach(
           task -> {
-            if (applyResidual
-                && (model == InputFormatConfig.InMemoryDataModel.HIVE
-                    || model == InputFormatConfig.InMemoryDataModel.PIG)) {
+            if (applyResidual && (model == InputFormatConfig.InMemoryDataModel.HIVE)) {
               // TODO: We do not support residual evaluation for HIVE and PIG in memory data model
               // yet
               checkResiduals(task);
@@ -347,9 +345,6 @@ private CloseableIterable<T> openTask(FileScanTask currentTask, Schema readSchem
     @SuppressWarnings("unchecked")
     private CloseableIterable<T> open(FileScanTask currentTask, Schema readSchema) {
       switch (inMemoryDataModel) {
-        case PIG:
-          // TODO: Support Pig and Hive object models for IcebergInputFormat
-          throw new UnsupportedOperationException("Pig and Hive object models are not supported.");
         case HIVE:
           return openTask(currentTask, readSchema);
         case GENERIC:
@@ -390,7 +385,6 @@ private CloseableIterable<T> newAvroIterable(
       }
 
       switch (inMemoryDataModel) {
-        case PIG:
         case HIVE:
           // TODO implement value readers for Pig and Hive
           throw new UnsupportedOperationException(
@@ -413,8 +407,6 @@ private CloseableIterable<T> newParquetIterable(
       CloseableIterable<T> parquetIterator = null;
 
       switch (inMemoryDataModel) {
-        case PIG:
-          throw new UnsupportedOperationException("Parquet support not yet supported for Pig");
         case HIVE:
           if (HiveVersion.min(HiveVersion.HIVE_3)) {
             parquetIterator =
@@ -459,9 +451,6 @@ private CloseableIterable<T> newOrcIterable(
       CloseableIterable<T> orcIterator = null;
       // ORC does not support reuse containers yet
       switch (inMemoryDataModel) {
-        case PIG:
-          // TODO: implement value readers for Pig
-          throw new UnsupportedOperationException("ORC support not yet supported for Pig");
         case HIVE:
           if (HiveVersion.min(HiveVersion.HIVE_3)) {
             orcIterator =

diff --git a/mr/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java b/mr/src/test/java/org/apache/iceberg/mr/TestIcebergInputFormats.java
@@ -223,8 +223,6 @@ public void testFailedResidualFiltering() throws Exception {
         .hasMessage(
             "Filter expression ref(name=\"id\") == 0 is not completely satisfied. Additional rows can be returned not satisfied by the filter expression");
 
-    builder.usePigTuples();
-
     assertThatThrownBy(() -> testInputFormat.create(builder.conf()))
         .isInstanceOf(UnsupportedOperationException.class)
         .hasMessage(