From ded16326bf909782200bba8ff16df34471b14ea4 Mon Sep 17 00:00:00 2001
From: Jack Ye <yezhaoqin@gmail.com>
Date: Tue, 9 Dec 2025 16:01:27 -0800
Subject: [PATCH] feat: support count optimization with indexed columns

---
 .../spark/internal/LanceDatasetAdapter.java   |  98 ++++++++++
 .../org/lance/spark/read/FilterPushDown.java  |  47 +++++
 .../LanceIndexedCountPartitionReader.java     | 104 ++++++++++
 .../lance/spark/read/LanceScanBuilder.java    |  60 ++++++
 .../lance/spark/read/LanceSplitCountScan.java | 183 ++++++++++++++++++
 .../LanceUnindexedCountPartitionReader.java   | 128 ++++++++++++
 .../java/org/lance/spark/utils/Optional.java  |   4 +
 .../BaseSparkConnectorAggPushdownTest.java    | 125 ++++++++++++
 pom.xml                                       |   2 +-
 9 files changed, 750 insertions(+), 1 deletion(-)
 create mode 100644 lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceIndexedCountPartitionReader.java
 create mode 100644 lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceSplitCountScan.java
 create mode 100644 lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceUnindexedCountPartitionReader.java
diff --git a/lance-spark-base_2.12/src/main/java/org/lance/spark/internal/LanceDatasetAdapter.java b/lance-spark-base_2.12/src/main/java/org/lance/spark/internal/LanceDatasetAdapter.java
index e6987384..b3db473d 100644
--- a/lance-spark-base_2.12/src/main/java/org/lance/spark/internal/LanceDatasetAdapter.java
+++ b/lance-spark-base_2.12/src/main/java/org/lance/spark/internal/LanceDatasetAdapter.java
@@ -27,8 +27,12 @@
 import org.lance.compaction.CompactionTask;
 import org.lance.compaction.RewriteResult;
 import org.lance.fragment.FragmentMergeResult;
+import org.lance.index.Index;
+import org.lance.index.IndexType;
 import org.lance.operation.Merge;
 import org.lance.operation.Update;
+import org.lance.schema.LanceField;
+import org.lance.schema.LanceSchema;
 import org.lance.spark.LanceConfig;
 import org.lance.spark.SparkOptions;
 import org.lance.spark.read.LanceInputPartition;
@@ -45,7 +49,10 @@
 import org.apache.spark.sql.util.LanceArrowUtils;
 
 import java.time.ZoneId;
+import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 import java.util.stream.Collectors;
 
 public class LanceDatasetAdapter {
@@ -293,4 +300,95 @@ public static void dropDataset(LanceConfig config) {
     ReadOptions options = SparkOptions.genReadOptionFromConfig(config);
     Dataset.drop(uri, options.getStorageOptions());
   }
+
+  /**
+   * Get all indexes from the dataset.
+   *
+   * @param dataset the opened Dataset
+   * @return list of Index objects with full metadata
+   */
+  public static List<Index> getIndexes(Dataset dataset) {
+    return dataset.getIndexes();
+  }
+
+  /**
+   * Get indexes that cover the specified column.
+   *
+   * @param dataset the opened Dataset
+   * @param columnName the name of the column
+   * @return list of indexes covering the column
+   */
+  public static List<Index> getIndexesForColumn(Dataset dataset, String columnName) {
+    LanceSchema lanceSchema = dataset.getLanceSchema();
+    Integer fieldId = findFieldIdByName(lanceSchema, columnName);
+    if (fieldId == null) {
+      return new ArrayList<>();
+    }
+
+    List<Index> allIndexes = dataset.getIndexes();
+    return allIndexes.stream()
+        .filter(index -> index.fields().contains(fieldId))
+        .collect(Collectors.toList());
+  }
+
+  private static Integer findFieldIdByName(LanceSchema schema, String columnName) {
+    for (LanceField field : schema.fields()) {
+      if (field.getName().equals(columnName)) {
+        return field.getId();
+      }
+    }
+    return null;
+  }
+
+  /**
+   * Check if an index is an exact index (BTREE or BITMAP). Exact indexes can be used for precise
+   * counting without scanning.
+   *
+   * @param index the index to check
+   * @return true if the index is an exact index
+   */
+  public static boolean isExactIndex(Index index) {
+    IndexType type = index.indexType();
+    return type == IndexType.BTREE || type == IndexType.BITMAP;
+  }
+
+  /**
+   * Get fragment IDs not covered by an index.
+   *
+   * @param dataset the opened Dataset
+   * @param index the index to check coverage for
+   * @return list of fragment IDs not covered by the index
+   */
+  public static List<Integer> getUnindexedFragments(Dataset dataset, Index index) {
+    Set<Integer> allFragments = new HashSet<>(getFragmentIds(dataset));
+    index.fragments().ifPresent(allFragments::removeAll);
+    return new ArrayList<>(allFragments);
+  }
+
+  /**
+   * Get fragment IDs covered by an index.
+   *
+   * @param index the index
+   * @return list of fragment IDs covered by the index, or empty if unknown
+   */
+  public static List<Integer> getIndexedFragments(Index index) {
+    return index.fragments().orElse(new ArrayList<>());
+  }
+
+  /**
+   * Count rows using a specific index.
+   *
+   * @param dataset the opened Dataset
+   * @param indexName the name of the index
+   * @param filter the filter expression
+   * @param fragmentIds optional list of fragment IDs to restrict counting to
+   * @return the count of matching rows
+   */
+  public static long countIndexedRows(
+      Dataset dataset,
+      String indexName,
+      String filter,
+      java.util.Optional<List<Integer>> fragmentIds) {
+    return dataset.countIndexedRows(indexName, filter, fragmentIds);
+  }
 }
diff --git a/lance-spark-base_2.12/src/main/java/org/lance/spark/read/FilterPushDown.java b/lance-spark-base_2.12/src/main/java/org/lance/spark/read/FilterPushDown.java
index cff5325e..cd18f0e4 100644
--- a/lance-spark-base_2.12/src/main/java/org/lance/spark/read/FilterPushDown.java
+++ b/lance-spark-base_2.12/src/main/java/org/lance/spark/read/FilterPushDown.java
@@ -36,7 +36,9 @@
 import java.sql.Timestamp;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 import java.util.stream.Collectors;
 
 public class FilterPushDown {
@@ -192,4 +194,49 @@ private static String compileValue(Object value) {
       return value.toString();
     }
   }
+
+  /**
+   * Extract column names from filters. Returns columns that are directly compared (EqualTo,
+   * GreaterThan, etc.).
+   *
+   * @param filters the filters to extract columns from
+   * @return set of column names used in the filters
+   */
+  public static Set<String> extractFilterColumns(Filter[] filters) {
+    Set<String> columns = new HashSet<>();
+    for (Filter filter : filters) {
+      extractColumnsFromFilter(filter, columns);
+    }
+    return columns;
+  }
+
+  private static void extractColumnsFromFilter(Filter filter, Set<String> columns) {
+    if (filter instanceof EqualTo) {
+      columns.add(((EqualTo) filter).attribute());
+    } else if (filter instanceof GreaterThan) {
+      columns.add(((GreaterThan) filter).attribute());
+    } else if (filter instanceof GreaterThanOrEqual) {
+      columns.add(((GreaterThanOrEqual) filter).attribute());
+    } else if (filter instanceof LessThan) {
+      columns.add(((LessThan) filter).attribute());
+    } else if (filter instanceof LessThanOrEqual) {
+      columns.add(((LessThanOrEqual) filter).attribute());
+    } else if (filter instanceof In) {
+      columns.add(((In) filter).attribute());
+    } else if (filter instanceof IsNull) {
+      columns.add(((IsNull) filter).attribute());
+    } else if (filter instanceof IsNotNull) {
+      columns.add(((IsNotNull) filter).attribute());
+    } else if (filter instanceof And) {
+      And f = (And) filter;
+      extractColumnsFromFilter(f.left(), columns);
+      extractColumnsFromFilter(f.right(), columns);
+    } else if (filter instanceof Or) {
+      Or f = (Or) filter;
+      extractColumnsFromFilter(f.left(), columns);
+      extractColumnsFromFilter(f.right(), columns);
+    } else if (filter instanceof Not) {
+      extractColumnsFromFilter(((Not) filter).child(), columns);
+    }
+  }
 }
diff --git a/lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceIndexedCountPartitionReader.java b/lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceIndexedCountPartitionReader.java
new file mode 100644
index 00000000..92963c4b
--- /dev/null
+++ b/lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceIndexedCountPartitionReader.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.lance.spark.read;
+
+import org.lance.Dataset;
+import org.lance.ReadOptions;
+import org.lance.spark.SparkOptions;
+import org.lance.spark.internal.LanceDatasetAdapter;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.util.LanceArrowUtils;
+import org.apache.spark.sql.vectorized.ColumnarBatch;
+import org.apache.spark.sql.vectorized.LanceArrowColumnVector;
+
+import java.io.IOException;
+import java.util.Optional;
+
+/**
+ * Partition reader that counts rows using direct index queries. All indexed fragments are processed
+ * in a single reader using the scalar index.
+ */
+public class LanceIndexedCountPartitionReader implements PartitionReader<ColumnarBatch> {
+  private final LanceSplitCountScan.LanceIndexedCountPartition partition;
+  private final BufferAllocator allocator;
+  private boolean finished = false;
+  private ColumnarBatch currentBatch;
+
+  public LanceIndexedCountPartitionReader(
+      LanceSplitCountScan.LanceIndexedCountPartition partition) {
+    this.partition = partition;
+    this.allocator = LanceDatasetAdapter.allocator;
+  }
+
+  @Override
+  public boolean next() throws IOException {
+    if (!finished) {
+      finished = true;
+      return true;
+    }
+    return false;
+  }
+
+  private long computeIndexedCount() {
+    String uri = partition.getConfig().getDatasetUri();
+    ReadOptions options = SparkOptions.genReadOptionFromConfig(partition.getConfig());
+
+    try (Dataset dataset = Dataset.open(allocator, uri, options)) {
+      String filter = partition.getWhereCondition().orElse("");
+      Optional<java.util.List<Integer>> fragmentIds = Optional.of(partition.getFragmentIds());
+
+      return LanceDatasetAdapter.countIndexedRows(
+          dataset, partition.getIndexName(), filter, fragmentIds);
+    }
+  }
+
+  private ColumnarBatch createCountResultBatch(long count) {
+    StructType countSchema = new StructType().add("count", DataTypes.LongType);
+    VectorSchemaRoot root =
+        VectorSchemaRoot.create(
+            LanceArrowUtils.toArrowSchema(countSchema, "UTC", false, false), allocator);
+
+    root.allocateNew();
+    BigIntVector countVector = (BigIntVector) root.getVector("count");
+    countVector.setSafe(0, count);
+    root.setRowCount(1);
+
+    LanceArrowColumnVector[] columns =
+        root.getFieldVectors().stream()
+            .map(LanceArrowColumnVector::new)
+            .toArray(LanceArrowColumnVector[]::new);
+
+    return new ColumnarBatch(columns, 1);
+  }
+
+  @Override
+  public ColumnarBatch get() {
+    long count = computeIndexedCount();
+    currentBatch = createCountResultBatch(count);
+    return currentBatch;
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (currentBatch != null) {
+      currentBatch.close();
+    }
+  }
+}
diff --git a/lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceScanBuilder.java b/lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceScanBuilder.java
index 08e6e3aa..0d44bf69 100644
--- a/lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceScanBuilder.java
+++ b/lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceScanBuilder.java
@@ -14,6 +14,7 @@
 package org.lance.spark.read;
 
 import org.lance.Dataset;
+import org.lance.index.Index;
 import org.lance.ipc.ColumnOrdering;
 import org.lance.spark.LanceConfig;
 import org.lance.spark.SparkOptions;
@@ -43,7 +44,10 @@
 import org.apache.spark.sql.types.StructType;
 
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
 
 public class LanceScanBuilder
     implements SupportsPushDownRequiredColumns,
@@ -61,6 +65,7 @@ public class LanceScanBuilder
   private Optional<List<ColumnOrdering>> topNSortOrders = Optional.empty();
   private Optional<Aggregation> pushedAggregation = Optional.empty();
   private LanceLocalScan localScan = null;
+  private LanceSplitCountScan splitCountScan = null;
 
   // Lazily opened dataset for reuse during scan building
   private Dataset lazyDataset = null;
@@ -98,6 +103,10 @@ public Scan build() {
     if (localScan != null) {
       return localScan;
     }
+    // Return SplitCountScan if we have partially indexed fragments
+    if (splitCountScan != null) {
+      return splitCountScan;
+    }
     Optional<String> whereCondition = FilterPushDown.compileFiltersToSqlWhereClause(pushedFilters);
     return new LanceScan(
         schema, config, whereCondition, limit, offset, topNSortOrders, pushedAggregation);
@@ -204,7 +213,58 @@ public boolean pushAggregation(Aggregation aggregation) {
           this.localScan = new LanceLocalScan(countSchema, rows, config.getDatasetUri());
           return true;
         }
+      } else {
+        // Check for indexed column optimization when filters are present
+        Set<String> filterColumns = FilterPushDown.extractFilterColumns(pushedFilters);
+        Dataset dataset = getOrOpenDataset();
+        List<Integer> allFragments = LanceDatasetAdapter.getFragmentIds(dataset);
+
+        for (String column : filterColumns) {
+          List<Index> indexes = LanceDatasetAdapter.getIndexesForColumn(dataset, column);
+          for (Index index : indexes) {
+            if (LanceDatasetAdapter.isExactIndex(index)) {
+              List<Integer> indexedFragments = LanceDatasetAdapter.getIndexedFragments(index);
+
+              if (!indexedFragments.isEmpty()) {
+                Set<Integer> indexedSet = new HashSet<>(indexedFragments);
+                List<Integer> unindexedFragments =
+                    allFragments.stream()
+                        .filter(f -> !indexedSet.contains(f))
+                        .collect(Collectors.toList());
+
+                Optional<String> whereCondition =
+                    FilterPushDown.compileFiltersToSqlWhereClause(pushedFilters);
+
+                if (unindexedFragments.isEmpty()) {
+                  // All fragments indexed - use local scan with index count
+                  long count =
+                      LanceDatasetAdapter.countIndexedRows(
+                          dataset,
+                          index.name(),
+                          whereCondition.orElse(""),
+                          java.util.Optional.empty());
+                  StructType countSchema = new StructType().add("count", DataTypes.LongType);
+                  InternalRow[] rows = new InternalRow[1];
+                  rows[0] = new GenericInternalRow(new Object[] {count});
+                  this.localScan = new LanceLocalScan(countSchema, rows, config.getDatasetUri());
+                  return true;
+                } else {
+                  // Mixed - use split scan
+                  this.splitCountScan =
+                      new LanceSplitCountScan(
+                          new ArrayList<>(indexedSet),
+                          unindexedFragments,
+                          index.name(),
+                          whereCondition,
+                          config);
+                  return true;
+                }
+              }
+            }
+          }
+        }
       }
+
       // Fall back to scan-based count (with filters or metadata unavailable)
       this.pushedAggregation = Optional.of(aggregation);
       return true;
diff --git a/lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceSplitCountScan.java b/lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceSplitCountScan.java
new file mode 100644
index 00000000..9fcf64bd
--- /dev/null
+++ b/lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceSplitCountScan.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.lance.spark.read;
+
+import org.lance.spark.LanceConfig;
+import org.lance.spark.utils.Optional;
+
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.read.Batch;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReaderFactory;
+import org.apache.spark.sql.connector.read.Scan;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.vectorized.ColumnarBatch;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A scan that combines indexed and unindexed fragments for count operations. Indexed fragments are
+ * processed in a single partition using direct index queries, while unindexed fragments are
+ * distributed normally (one partition per fragment).
+ */
+public class LanceSplitCountScan implements Batch, Scan, Serializable {
+  private static final long serialVersionUID = 3847293847293847294L;
+
+  private final List<Integer> indexedFragmentIds;
+  private final List<Integer> unindexedFragmentIds;
+  private final String indexName;
+  private final Optional<String> whereCondition;
+  private final LanceConfig config;
+
+  public LanceSplitCountScan(
+      List<Integer> indexedFragmentIds,
+      List<Integer> unindexedFragmentIds,
+      String indexName,
+      Optional<String> whereCondition,
+      LanceConfig config) {
+    this.indexedFragmentIds = indexedFragmentIds;
+    this.unindexedFragmentIds = unindexedFragmentIds;
+    this.indexName = indexName;
+    this.whereCondition = whereCondition;
+    this.config = config;
+  }
+
+  @Override
+  public Batch toBatch() {
+    return this;
+  }
+
+  @Override
+  public InputPartition[] planInputPartitions() {
+    List<InputPartition> partitions = new ArrayList<>();
+
+    // First partition: all indexed fragments (single worker with index query)
+    if (!indexedFragmentIds.isEmpty()) {
+      partitions.add(
+          new LanceIndexedCountPartition(indexedFragmentIds, indexName, whereCondition, config));
+    }
+
+    // Remaining partitions: unindexed fragments (distributed normally)
+    for (Integer fragmentId : unindexedFragmentIds) {
+      List<Integer> singleFragment = new ArrayList<>();
+      singleFragment.add(fragmentId);
+      partitions.add(new LanceUnindexedCountPartition(singleFragment, whereCondition, config));
+    }
+
+    return partitions.toArray(new InputPartition[0]);
+  }
+
+  @Override
+  public PartitionReaderFactory createReaderFactory() {
+    return new SplitCountReaderFactory();
+  }
+
+  @Override
+  public StructType readSchema() {
+    return new StructType().add("count", DataTypes.LongType);
+  }
+
+  /** Partition for indexed fragments that will use direct index query. */
+  public static class LanceIndexedCountPartition implements InputPartition, Serializable {
+    private static final long serialVersionUID = 4723894723984723985L;
+
+    private final List<Integer> fragmentIds;
+    private final String indexName;
+    private final Optional<String> whereCondition;
+    private final LanceConfig config;
+
+    public LanceIndexedCountPartition(
+        List<Integer> fragmentIds,
+        String indexName,
+        Optional<String> whereCondition,
+        LanceConfig config) {
+      this.fragmentIds = fragmentIds;
+      this.indexName = indexName;
+      this.whereCondition = whereCondition;
+      this.config = config;
+    }
+
+    public List<Integer> getFragmentIds() {
+      return fragmentIds;
+    }
+
+    public String getIndexName() {
+      return indexName;
+    }
+
+    public Optional<String> getWhereCondition() {
+      return whereCondition;
+    }
+
+    public LanceConfig getConfig() {
+      return config;
+    }
+  }
+
+  /** Partition for unindexed fragments that will use normal scan-based count. */
+  public static class LanceUnindexedCountPartition implements InputPartition, Serializable {
+    private static final long serialVersionUID = 4723894723984723986L;
+
+    private final List<Integer> fragmentIds;
+    private final Optional<String> whereCondition;
+    private final LanceConfig config;
+
+    public LanceUnindexedCountPartition(
+        List<Integer> fragmentIds, Optional<String> whereCondition, LanceConfig config) {
+      this.fragmentIds = fragmentIds;
+      this.whereCondition = whereCondition;
+      this.config = config;
+    }
+
+    public List<Integer> getFragmentIds() {
+      return fragmentIds;
+    }
+
+    public Optional<String> getWhereCondition() {
+      return whereCondition;
+    }
+
+    public LanceConfig getConfig() {
+      return config;
+    }
+  }
+
+  private static class SplitCountReaderFactory implements PartitionReaderFactory {
+    @Override
+    public PartitionReader<InternalRow> createReader(InputPartition partition) {
+      throw new UnsupportedOperationException("Row-based reads not supported for split count scan");
+    }
+
+    @Override
+    public PartitionReader<ColumnarBatch> createColumnarReader(InputPartition partition) {
+      if (partition instanceof LanceIndexedCountPartition) {
+        return new LanceIndexedCountPartitionReader((LanceIndexedCountPartition) partition);
+      } else if (partition instanceof LanceUnindexedCountPartition) {
+        return new LanceUnindexedCountPartitionReader((LanceUnindexedCountPartition) partition);
+      } else {
+        throw new IllegalArgumentException(
+            "Unknown partition type: " + partition.getClass().getName());
+      }
+    }
+
+    @Override
+    public boolean supportColumnarReads(InputPartition partition) {
+      return true;
+    }
+  }
+}
diff --git a/lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceUnindexedCountPartitionReader.java b/lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceUnindexedCountPartitionReader.java
new file mode 100644
index 00000000..6b6448f2
--- /dev/null
+++ b/lance-spark-base_2.12/src/main/java/org/lance/spark/read/LanceUnindexedCountPartitionReader.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.lance.spark.read;
+
+import org.lance.Dataset;
+import org.lance.ReadOptions;
+import org.lance.ipc.LanceScanner;
+import org.lance.ipc.ScanOptions;
+import org.lance.spark.SparkOptions;
+import org.lance.spark.internal.LanceDatasetAdapter;
+
+import com.google.common.collect.Lists;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.util.LanceArrowUtils;
+import org.apache.spark.sql.vectorized.ColumnarBatch;
+import org.apache.spark.sql.vectorized.LanceArrowColumnVector;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Partition reader for unindexed fragments that uses normal scan-based counting. Each partition
+ * contains a subset of fragments to count.
+ */
+public class LanceUnindexedCountPartitionReader implements PartitionReader<ColumnarBatch> {
+  private final LanceSplitCountScan.LanceUnindexedCountPartition partition;
+  private final BufferAllocator allocator;
+  private boolean finished = false;
+  private ColumnarBatch currentBatch;
+
+  public LanceUnindexedCountPartitionReader(
+      LanceSplitCountScan.LanceUnindexedCountPartition partition) {
+    this.partition = partition;
+    this.allocator = LanceDatasetAdapter.allocator;
+  }
+
+  @Override
+  public boolean next() throws IOException {
+    if (!finished) {
+      finished = true;
+      return true;
+    }
+    return false;
+  }
+
+  private long computeCount() {
+    String uri = partition.getConfig().getDatasetUri();
+    ReadOptions options = SparkOptions.genReadOptionFromConfig(partition.getConfig());
+    long totalCount = 0;
+
+    try (Dataset dataset = Dataset.open(allocator, uri, options)) {
+      List<Integer> fragmentIds = partition.getFragmentIds();
+      if (fragmentIds.isEmpty()) {
+        return 0;
+      }
+
+      ScanOptions.Builder scanOptionsBuilder = new ScanOptions.Builder();
+      if (partition.getWhereCondition().isPresent()) {
+        scanOptionsBuilder.filter(partition.getWhereCondition().get());
+      }
+      scanOptionsBuilder.withRowId(true);
+      scanOptionsBuilder.columns(Lists.newArrayList());
+      scanOptionsBuilder.fragmentIds(fragmentIds);
+
+      try (LanceScanner scanner = dataset.newScan(scanOptionsBuilder.build())) {
+        try (ArrowReader reader = scanner.scanBatches()) {
+          while (reader.loadNextBatch()) {
+            totalCount += reader.getVectorSchemaRoot().getRowCount();
+          }
+        }
+      } catch (Exception e) {
+        throw new RuntimeException("Failed to scan fragments " + fragmentIds, e);
+      }
+    }
+
+    return totalCount;
+  }
+
+  private ColumnarBatch createCountResultBatch(long count) {
+    StructType countSchema = new StructType().add("count", DataTypes.LongType);
+    VectorSchemaRoot root =
+        VectorSchemaRoot.create(
+            LanceArrowUtils.toArrowSchema(countSchema, "UTC", false, false), allocator);
+
+    root.allocateNew();
+    BigIntVector countVector = (BigIntVector) root.getVector("count");
+    countVector.setSafe(0, count);
+    root.setRowCount(1);
+
+    LanceArrowColumnVector[] columns =
+        root.getFieldVectors().stream()
+            .map(LanceArrowColumnVector::new)
+            .toArray(LanceArrowColumnVector[]::new);
+
+    return new ColumnarBatch(columns, 1);
+  }
+
+  @Override
+  public ColumnarBatch get() {
+    long count = computeCount();
+    currentBatch = createCountResultBatch(count);
+    return currentBatch;
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (currentBatch != null) {
+      currentBatch.close();
+    }
+  }
+}
diff --git a/lance-spark-base_2.12/src/main/java/org/lance/spark/utils/Optional.java b/lance-spark-base_2.12/src/main/java/org/lance/spark/utils/Optional.java
index 62554dc0..a0d8ecd1 100644
--- a/lance-spark-base_2.12/src/main/java/org/lance/spark/utils/Optional.java
+++ b/lance-spark-base_2.12/src/main/java/org/lance/spark/utils/Optional.java
@@ -63,6 +63,10 @@ public void ifPresent(Consumer<? super T> action) {
     }
   }
 
+  public T orElse(T other) {
+    return value != null ? value : other;
+  }
+
   @Override
   public String toString() {
     return value != null ? String.format("Optional[%s]", value) : "Optional.empty";
diff --git a/lance-spark-base_2.12/src/test/java/org/lance/spark/read/BaseSparkConnectorAggPushdownTest.java b/lance-spark-base_2.12/src/test/java/org/lance/spark/read/BaseSparkConnectorAggPushdownTest.java
index 22e2285f..796c769c 100644
--- a/lance-spark-base_2.12/src/test/java/org/lance/spark/read/BaseSparkConnectorAggPushdownTest.java
+++ b/lance-spark-base_2.12/src/test/java/org/lance/spark/read/BaseSparkConnectorAggPushdownTest.java
@@ -13,6 +13,13 @@
  */
 package org.lance.spark.read;
 
+import org.lance.ReadOptions;
+import org.lance.index.IndexParams;
+import org.lance.index.IndexType;
+import org.lance.index.scalar.ScalarIndexParams;
+import org.lance.spark.internal.LanceDatasetAdapter;
+
+import org.apache.arrow.memory.BufferAllocator;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.RowFactory;
@@ -25,8 +32,11 @@
 
 import java.nio.file.Path;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.Optional;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public abstract class BaseSparkConnectorAggPushdownTest {
@@ -216,8 +226,123 @@ public void testCountStarWithFilterUsesBatchScan() throws Exception {
         plan.contains("BatchScan") || plan.contains("LanceScan"),
         "COUNT(*) with filter should use BatchScan. Plan: " + plan);
 
+    // Verify LocalTableScan is NOT used (that's for metadata-only counts)
+    assertFalse(
+        plan.contains("LocalTableScan"),
+        "COUNT(*) with filter should NOT use LocalTableScan. Plan: " + plan);
+
+    // Verify SplitCountScan is NOT used (no index exists)
+    assertFalse(
+        plan.contains("LanceSplitCountScan"),
+        "COUNT(*) with filter but no index should NOT use LanceSplitCountScan. Plan: " + plan);
+
     // Verify the count is correct (ids 11 to 49 = 39 rows)
     long count = countDataset.first().getLong(0);
     assertEquals(39L, count, "Filtered count should be 39");
   }
+
+  @Test
+  public void testCountStarWithIndexedColumnUsesLocalScan() throws Exception {
+    String tableName = "lance.default.count_indexed_local_scan_test";
+    String datasetPath = tempDir.resolve("count_indexed_local_scan_test.lance").toString();
+
+    // Create dataset with multiple fragments
+    spark.range(0, 100).toDF("id").repartition(4).writeTo(tableName).create();
+
+    // Create a BTREE index on the 'id' column using Lance Java SDK
+    BufferAllocator allocator = LanceDatasetAdapter.allocator;
+    try (org.lance.Dataset lanceDataset =
+        org.lance.Dataset.open(allocator, datasetPath, new ReadOptions.Builder().build())) {
+
+      ScalarIndexParams scalarParams = ScalarIndexParams.create("btree", "{}");
+      IndexParams indexParams = IndexParams.builder().setScalarIndexParams(scalarParams).build();
+
+      lanceDataset.createIndex(
+          Collections.singletonList("id"),
+          IndexType.BTREE,
+          Optional.of("id_btree_index"),
+          indexParams,
+          true);
+
+      // Verify index was created
+      assertTrue(lanceDataset.listIndexes().contains("id_btree_index"), "Index should be created");
+    }
+
+    // Refresh the table to pick up the new index
+    spark.catalog().refreshTable(tableName);
+
+    // Query with filter on indexed column
+    Dataset<Row> lanceDataset = spark.table(tableName);
+    Dataset<Row> countDataset = lanceDataset.filter("id > 50").selectExpr("count(*)");
+
+    // Get the query plan as string
+    String plan = countDataset.queryExecution().executedPlan().toString();
+
+    // When all fragments are indexed, it should use LocalTableScan (direct index count)
+    assertTrue(
+        plan.contains("LocalTableScan"),
+        "COUNT(*) with filter on fully indexed column should use LocalTableScan. Plan: " + plan);
+
+    // Verify the count is correct (ids 51 to 99 = 49 rows)
+    long count = countDataset.first().getLong(0);
+    assertEquals(49L, count, "Filtered count should be 49");
+  }
+
+  @Test
+  public void testCountStarWithPartialIndexUsesSplitScan() throws Exception {
+    String tableName = "lance.default.count_split_scan_test";
+    String datasetPath = tempDir.resolve("count_split_scan_test.lance").toString();
+
+    // Create initial dataset with 2 fragments
+    spark.range(0, 50).toDF("id").repartition(2).writeTo(tableName).create();
+
+    // Create a BTREE index on 'id' column (covers initial fragments)
+    BufferAllocator allocator = LanceDatasetAdapter.allocator;
+    try (org.lance.Dataset lanceDataset =
+        org.lance.Dataset.open(allocator, datasetPath, new ReadOptions.Builder().build())) {
+
+      ScalarIndexParams scalarParams = ScalarIndexParams.create("btree", "{}");
+      IndexParams indexParams = IndexParams.builder().setScalarIndexParams(scalarParams).build();
+
+      lanceDataset.createIndex(
+          Collections.singletonList("id"),
+          IndexType.BTREE,
+          Optional.of("id_partial_index"),
+          indexParams,
+          true);
+
+      assertTrue(
+          lanceDataset.listIndexes().contains("id_partial_index"), "Index should be created");
+    }
+
+    // Append more data (creates new unindexed fragments)
+    spark.range(50, 100).toDF("id").repartition(2).writeTo(tableName).append();
+
+    // Refresh the table
+    spark.catalog().refreshTable(tableName);
+
+    // Verify total row count first
+    Dataset<Row> lanceDataset = spark.table(tableName);
+    long totalCount = lanceDataset.count();
+    assertEquals(100L, totalCount, "Total count should be 100");
+
+    // Query with filter on partially indexed column
+    Dataset<Row> countDataset = lanceDataset.filter("id > 25").selectExpr("count(*)");
+
+    // Get the query plan as string
+    String plan = countDataset.queryExecution().executedPlan().toString();
+
+    // With partial index, it could use SplitCountScan (optimization) or BatchScan (fallback)
+    // The key is that the query should still return correct results
+    assertTrue(
+        plan.contains("LanceSplitCountScan") || plan.contains("BatchScan"),
+        "COUNT(*) with filter on partially indexed column should use SplitCountScan or BatchScan. Plan: "
+            + plan);
+
+    // Verify the count is correct (ids 26 to 99 = 74 rows)
+    // Note: If SplitCountScan optimization is used but countIndexedRows JNI is incomplete,
+    // this may fail. In that case, we'd fall back to BatchScan which should be correct.
+    long count = countDataset.first().getLong(0);
+    assertEquals(74L, count, "Filtered count should be 74");
+  }
 }
diff --git a/pom.xml b/pom.xml
index 31a131e2..a0b7aca5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -51,7 +51,7 @@
 
     <properties>
         <lance-spark.version>0.1.3-beta.1</lance-spark.version>
-        <lance.version>1.0.0-rc.3</lance.version>
+        <lance.version>1.1.0-beta.2</lance.version>
         <lance-namespace.version>0.2.1</lance-namespace.version>
 
         <antlr4.version>4.9.3</antlr4.version>