NVIDIA · wjxiz1992 · Mar 24, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/parquet/ParquetCachedBatchSerializer.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/parquet/ParquetCachedBatchSerializer.scala
@@ -466,19 +466,25 @@ class ParquetCachedBatchSerializer extends GpuCachedBatchSerializer {
       cacheAttributes: Seq[Attribute],
       selectedAttributes: Seq[Attribute],
       conf: SQLConf): RDD[ColumnarBatch] = {
-    // optimize
-    val newSelectedAttributes = if (selectedAttributes.isEmpty) {
-      cacheAttributes
-    } else {
-      selectedAttributes
+    // When no columns are selected (e.g., count-only scan or
+    // cross-join side that needs only row count), return
+    // row-only batches without decoding parquet data.
+    if (selectedAttributes.isEmpty) {
+      return input.map {
+        case parquetCB: ParquetCachedBatch =>
+          new ColumnarBatch(Array.empty, parquetCB.numRows)
+        case other =>
+          throw new IllegalStateException(
+            s"Expected ParquetCachedBatch but got ${other.getClass}")
+      }
     }
     val (cachedSchemaWithNames, selectedSchemaWithNames) =
-      getSupportedSchemaFromUnsupported(cacheAttributes, newSelectedAttributes)
+      getSupportedSchemaFromUnsupported(cacheAttributes, selectedAttributes)
     convertCachedBatchToColumnarInternal(
       input,
       cachedSchemaWithNames,
       selectedSchemaWithNames,
-      newSelectedAttributes)
+      selectedAttributes)
   }
 
   private def convertCachedBatchToColumnarInternal(
@@ -563,19 +569,23 @@ class ParquetCachedBatchSerializer extends GpuCachedBatchSerializer {
       cacheAttributes: Seq[Attribute],
       selectedAttributes: Seq[Attribute],
       conf: SQLConf): RDD[ColumnarBatch] = {
-    // optimize
-    val newSelectedAttributes = if (selectedAttributes.isEmpty) {
-      cacheAttributes
-    } else {
-      selectedAttributes
+    // When no columns are selected, return row-only batches
+    if (selectedAttributes.isEmpty) {
+      return input.map {
+        case parquetCB: ParquetCachedBatch =>
+          new ColumnarBatch(Array.empty, parquetCB.numRows)
+        case other =>
+          throw new IllegalStateException(
+            s"Expected ParquetCachedBatch but got ${other.getClass}")
+      }
     }
-    if (selectedAttributes.isEmpty) {
-      return input.map {
-        case parquetCB: ParquetCachedBatch =>
-          new ColumnarBatch(Array.empty, parquetCB.numRows)
-        case other =>
-          throw new IllegalStateException(
-            s"Expected ParquetCachedBatch but got ${other.getClass}")
-      }
-    }
+    // When no columns are selected, return row-only batches
+    if (selectedAttributes.isEmpty) {
+      return input.mapPartitions { cbIter =>
+        CloseableColumnBatchIterator(cbIter.map {
+          case parquetCB: ParquetCachedBatch =>
+            new ColumnarBatch(Array.empty, parquetCB.numRows)
+          case other =>
+            throw new IllegalStateException(
+              s"Expected ParquetCachedBatch but got ${other.getClass}")
+        })
+      }
+    }
-    if (selectedAttributes.isEmpty) {
-      return input.map {
-        case parquetCB: ParquetCachedBatch =>
-          new ColumnarBatch(Array.empty, parquetCB.numRows)
-        case other =>
-          throw new IllegalStateException(
-            s"Expected ParquetCachedBatch but got ${other.getClass}")
-      }
-    }
+    // When no columns are selected, return row-only batches
+    if (selectedAttributes.isEmpty) {
+      return input.mapPartitions { cbIter =>
+        CloseableColumnBatchIterator(cbIter.map {
+          case parquetCB: ParquetCachedBatch =>
+            new ColumnarBatch(Array.empty, parquetCB.numRows)
+          case other =>
+            throw new IllegalStateException(
+              s"Expected ParquetCachedBatch but got ${other.getClass}")
+        })
+      }
+    }
     val rapidsConf = new RapidsConf(conf)
     val (cachedSchemaWithNames, selectedSchemaWithNames) =
-      getSupportedSchemaFromUnsupported(cacheAttributes, newSelectedAttributes)
+      getSupportedSchemaFromUnsupported(cacheAttributes, selectedAttributes)
     if (rapidsConf.isSqlEnabled && rapidsConf.isSqlExecuteOnGPU &&
         isSchemaSupportedByCudf(cachedSchemaWithNames)) {
       val batches = convertCachedBatchToColumnarInternal(input, cachedSchemaWithNames,
-        selectedSchemaWithNames, newSelectedAttributes)
+        selectedSchemaWithNames, selectedAttributes)
       val cbRdd = batches.map(batch => {
         withResource(batch) { gpuBatch =>
           val cols = GpuColumnVector.extractColumns(gpuBatch)
@@ -585,7 +595,7 @@ class ParquetCachedBatchSerializer extends GpuCachedBatchSerializer {
       cbRdd.mapPartitions(iter => CloseableColumnBatchIterator(iter))
     } else {
       val origSelectedAttributesWithUnambiguousNames =
-        sanitizeColumnNames(newSelectedAttributes, selectedSchemaWithNames)
+        sanitizeColumnNames(selectedAttributes, selectedSchemaWithNames)
       val broadcastedConf = SparkSession.active.sparkContext.broadcast(conf.getAllConfs)
       input.mapPartitions {
         cbIter => {

diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
@@ -216,7 +216,6 @@ class RapidsTestSettings extends BackendTestSettings {
   enableSuite[RapidsMergedParquetReadSchemaSuite]
   enableSuite[RapidsGeneratorFunctionSuite]
   enableSuite[RapidsSQLQuerySuite]
-    .exclude("SPARK-6743: no columns from cache", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/14098"))
     .exclude("aggregation with codegen updates peak execution memory", WONT_FIX_ISSUE("Codegen and memory metrics not applicable for GPU"))
     .exclude("external sorting updates peak execution memory", WONT_FIX_ISSUE("Memory metrics implementation differs on GPU"))
     .exclude("run sql directly on files", ADJUST_UT("Replaced by testRapids version that expects \"Path does not exist\" instead of \"Hive built-in ORC data source must be used with Hive support\" because there's a spark-hive jar in the CLASSPATH in our UT running"))