diff --git a/google-cloud-bigtable/clirr-ignored-differences.xml b/google-cloud-bigtable/clirr-ignored-differences.xml
index 42ed3e3f7e..a5f9d8c3e6 100644
--- a/google-cloud-bigtable/clirr-ignored-differences.xml
+++ b/google-cloud-bigtable/clirr-ignored-differences.xml
@@ -426,4 +426,34 @@
*create*
*
+
+ 4001
+ com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimer
+ com/google/api/gax/grpc/ChannelPrimer
+
+
+ 4001
+ com/google/cloud/bigtable/data/v2/stub/NoOpChannelPrimer
+ com/google/api/gax/grpc/ChannelPrimer
+
+
+ 7005
+ com/google/cloud/bigtable/gaxx/grpc/BigtableChannelPool
+ *create*
+ *
+
+
+
+ 7005
+ com/google/cloud/bigtable/gaxx/grpc/BigtableTransportChannelProvider
+ *create*
+ *
+
+
+
+ 7006
+ com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimer
+ *sendPrimeRequestsAsync*
+ com.google.api.core.ApiFuture
+
diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimer.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimer.java
index 4ace6c7567..97c6e364c8 100644
--- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimer.java
+++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimer.java
@@ -15,14 +15,15 @@
*/
package com.google.cloud.bigtable.data.v2.stub;
+import com.google.api.core.ApiFuture;
import com.google.api.core.InternalApi;
import com.google.api.core.SettableApiFuture;
-import com.google.api.gax.grpc.ChannelPrimer;
import com.google.auth.Credentials;
import com.google.bigtable.v2.BigtableGrpc;
import com.google.bigtable.v2.InstanceName;
import com.google.bigtable.v2.PingAndWarmRequest;
import com.google.bigtable.v2.PingAndWarmResponse;
+import com.google.cloud.bigtable.gaxx.grpc.ChannelPrimer;
import io.grpc.CallCredentials;
import io.grpc.CallOptions;
import io.grpc.ClientCall;
@@ -110,8 +111,7 @@ private void sendPrimeRequestsBlocking(ManagedChannel managedChannel) {
}
}
- public SettableApiFuture sendPrimeRequestsAsync(
- ManagedChannel managedChannel) {
+ public ApiFuture sendPrimeRequestsAsync(ManagedChannel managedChannel) {
ClientCall clientCall =
managedChannel.newCall(
BigtableGrpc.getPingAndWarmMethod(),
diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/BigtableClientContext.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/BigtableClientContext.java
index 233294fe4e..92a984a015 100644
--- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/BigtableClientContext.java
+++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/BigtableClientContext.java
@@ -20,7 +20,6 @@
import com.google.api.gax.core.BackgroundResource;
import com.google.api.gax.core.CredentialsProvider;
import com.google.api.gax.core.FixedCredentialsProvider;
-import com.google.api.gax.grpc.ChannelPrimer;
import com.google.api.gax.grpc.InstantiatingGrpcChannelProvider;
import com.google.api.gax.rpc.ClientContext;
import com.google.auth.Credentials;
@@ -34,6 +33,7 @@
import com.google.cloud.bigtable.data.v2.stub.metrics.MetricsProvider;
import com.google.cloud.bigtable.data.v2.stub.metrics.NoopMetricsProvider;
import com.google.cloud.bigtable.gaxx.grpc.BigtableTransportChannelProvider;
+import com.google.cloud.bigtable.gaxx.grpc.ChannelPrimer;
import io.grpc.ManagedChannelBuilder;
import io.grpc.opentelemetry.GrpcOpenTelemetry;
import io.opentelemetry.api.OpenTelemetry;
diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/NoOpChannelPrimer.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/NoOpChannelPrimer.java
index aed412fd0d..3cb98d9dee 100644
--- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/NoOpChannelPrimer.java
+++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/NoOpChannelPrimer.java
@@ -15,8 +15,11 @@
*/
package com.google.cloud.bigtable.data.v2.stub;
+import com.google.api.core.ApiFuture;
import com.google.api.core.InternalApi;
-import com.google.api.gax.grpc.ChannelPrimer;
+import com.google.api.core.SettableApiFuture;
+import com.google.bigtable.v2.PingAndWarmResponse;
+import com.google.cloud.bigtable.gaxx.grpc.ChannelPrimer;
import io.grpc.ManagedChannel;
@InternalApi
@@ -28,7 +31,14 @@ static NoOpChannelPrimer create() {
private NoOpChannelPrimer() {}
@Override
- public void primeChannel(ManagedChannel managedChannel) {
+ public void primeChannel(ManagedChannel channel) {
// No op
}
+
+ @Override
+ public ApiFuture sendPrimeRequestsAsync(ManagedChannel channel) {
+ SettableApiFuture future = SettableApiFuture.create();
+ future.set(PingAndWarmResponse.getDefaultInstance());
+ return future;
+ }
}
diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableChannelPool.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableChannelPool.java
index da7bd4f956..c8ced11158 100644
--- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableChannelPool.java
+++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableChannelPool.java
@@ -17,7 +17,7 @@
import com.google.api.core.InternalApi;
import com.google.api.gax.grpc.ChannelFactory;
-import com.google.api.gax.grpc.ChannelPrimer;
+import com.google.cloud.bigtable.gaxx.grpc.ChannelPoolHealthChecker.ProbeResult;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
@@ -31,9 +31,11 @@
import io.grpc.MethodDescriptor;
import io.grpc.Status;
import java.io.IOException;
+import java.time.Clock;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CancellationException;
+import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
@@ -64,9 +66,9 @@ public class BigtableChannelPool extends ManagedChannel {
private final ChannelPrimer channelPrimer;
private final ScheduledExecutorService executor;
-
private final Object entryWriteLock = new Object();
@VisibleForTesting final AtomicReference> entries = new AtomicReference<>();
+ private final ChannelPoolHealthChecker channelPoolHealthChecker;
private final AtomicInteger indexTicker = new AtomicInteger();
private final String authority;
@@ -96,6 +98,10 @@ public static BigtableChannelPool create(
this.settings = settings;
this.channelFactory = channelFactory;
this.channelPrimer = channelPrimer;
+ Clock systemClock = Clock.systemUTC();
+ this.channelPoolHealthChecker =
+ new ChannelPoolHealthChecker(entries::get, channelPrimer, executor, systemClock);
+ this.channelPoolHealthChecker.start();
ImmutableList.Builder initialListBuilder = ImmutableList.builder();
@@ -445,15 +451,32 @@ static class Entry {
private final AtomicInteger maxOutstanding = new AtomicInteger();
- // Flag that the channel should be closed once all of the outstanding RPC complete.
+ /** Queue storing the last 5 minutes of probe results */
+ @VisibleForTesting
+ final ConcurrentLinkedQueue probeHistory = new ConcurrentLinkedQueue<>();
+
+ /**
+ * Keep both # of failed and # of successful probes so that we don't have to check size() on the
+ * ConcurrentLinkedQueue all the time
+ */
+ final AtomicInteger failedProbesInWindow = new AtomicInteger();
+
+ final AtomicInteger successfulProbesInWindow = new AtomicInteger();
+
+ // Flag that the channel should be closed once all the outstanding RPCs complete.
private final AtomicBoolean shutdownRequested = new AtomicBoolean();
// Flag that the channel has been closed.
private final AtomicBoolean shutdownInitiated = new AtomicBoolean();
- private Entry(ManagedChannel channel) {
+ @VisibleForTesting
+ Entry(ManagedChannel channel) {
this.channel = channel;
}
+ ManagedChannel getManagedChannel() {
+ return this.channel;
+ }
+
int getAndResetMaxOutstanding() {
return maxOutstanding.getAndSet(outstandingRpcs.get());
}
@@ -468,7 +491,7 @@ private boolean retain() {
// register desire to start RPC
int currentOutstanding = outstandingRpcs.incrementAndGet();
- // Rough book keeping
+ // Rough bookkeeping
int prevMax = maxOutstanding.get();
if (currentOutstanding > prevMax) {
maxOutstanding.incrementAndGet();
diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableTransportChannelProvider.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableTransportChannelProvider.java
index 3c4cf24bca..ba18994619 100644
--- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableTransportChannelProvider.java
+++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableTransportChannelProvider.java
@@ -18,7 +18,6 @@
import com.google.api.core.InternalApi;
import com.google.api.gax.grpc.ChannelFactory;
import com.google.api.gax.grpc.ChannelPoolSettings;
-import com.google.api.gax.grpc.ChannelPrimer;
import com.google.api.gax.grpc.GrpcTransportChannel;
import com.google.api.gax.grpc.InstantiatingGrpcChannelProvider;
import com.google.api.gax.rpc.TransportChannel;
diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPoolHealthChecker.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPoolHealthChecker.java
new file mode 100644
index 0000000000..cb0841e7a1
--- /dev/null
+++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPoolHealthChecker.java
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.gaxx.grpc;
+
+import com.google.api.core.ApiFuture;
+import com.google.auto.value.AutoValue;
+import com.google.bigtable.v2.PingAndWarmResponse;
+import com.google.cloud.bigtable.data.v2.stub.BigtableChannelPrimer;
+import com.google.cloud.bigtable.gaxx.grpc.BigtableChannelPool.Entry;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.util.concurrent.MoreExecutors;
+import java.time.Clock;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.Comparator;
+import java.util.List;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Supplier;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.stream.Collectors;
+import javax.annotation.Nullable;
+
+/** Class that manages the health checking in the BigtableChannelPool */
+class ChannelPoolHealthChecker {
+
+ private static final Logger logger = Logger.getLogger(ChannelPoolHealthChecker.class.getName());
+
+ // Configuration constants
+ // Window_Duration is the duration over which we keep probe results
+ private static final Duration WINDOW_DURATION = Duration.ofMinutes(5);
+ // Interval at which we probe channel health
+ private static final Duration PROBE_INTERVAL = Duration.ofSeconds(30);
+ // Timeout deadline for a probe
+ @VisibleForTesting static final Duration PROBE_DEADLINE = Duration.ofMillis(500);
+ // Minimum interval between new idle channel evictions
+ private static final Duration MIN_EVICTION_INTERVAL = Duration.ofMinutes(10);
+ // Minimum number of probes that must be sent to a channel before it will be considered for
+ // eviction
+ private static final int MIN_PROBES_FOR_EVALUATION = 4;
+ // Percentage of probes that must fail for a channel to be considered unhealthy
+ private static final int SINGLE_CHANNEL_FAILURE_PERCENT_THRESHOLD = 60;
+ // "Circuitbreaker" - If this or a higher percentage of channels in a pool are bad, we will not
+ // evict any channels
+ private static final int POOLWIDE_BAD_CHANNEL_CIRCUITBREAKER_PERCENT = 70;
+
+ /** Inner class to represent the result of a single probe. */
+ @AutoValue
+ abstract static class ProbeResult {
+ abstract Instant startTime();
+
+ abstract boolean isSuccessful();
+
+ static ProbeResult create(Instant startTime, boolean success) {
+ return new AutoValue_ChannelPoolHealthChecker_ProbeResult(startTime, success);
+ }
+ }
+
+ private final Supplier> entrySupplier;
+ private volatile Instant lastEviction;
+ private final ScheduledExecutorService executor;
+
+ private final ChannelPrimer channelPrimer;
+
+ private ScheduledFuture> probeTaskScheduledFuture;
+ private ScheduledFuture> detectAndRemoveTaskScheduledFuture;
+
+ private final Clock clock;
+
+ /** Constructor for the pool health checker. */
+ public ChannelPoolHealthChecker(
+ Supplier> entrySupplier,
+ ChannelPrimer channelPrimer,
+ ScheduledExecutorService executor,
+ Clock clock) {
+ this.entrySupplier = entrySupplier;
+ this.lastEviction = Instant.MIN;
+ this.channelPrimer = channelPrimer;
+ this.executor = executor;
+ this.clock = clock;
+ }
+
+ void start() {
+ if (!(channelPrimer instanceof BigtableChannelPrimer)) {
+ logger.log(
+ Level.WARNING,
+ "Provided channelPrimer not an instance of BigtableChannelPrimer, not checking channel health.");
+ return;
+ }
+
+ Duration initialDelayProbe =
+ Duration.ofMillis(ThreadLocalRandom.current().nextLong(PROBE_INTERVAL.toMillis()));
+ this.probeTaskScheduledFuture =
+ executor.scheduleAtFixedRate(
+ this::runProbes,
+ initialDelayProbe.toMillis(),
+ PROBE_INTERVAL.toMillis(),
+ TimeUnit.MILLISECONDS);
+ Duration initialDelayDetect =
+ Duration.ofMillis(ThreadLocalRandom.current().nextLong(PROBE_INTERVAL.toMillis()));
+ this.detectAndRemoveTaskScheduledFuture =
+ executor.scheduleAtFixedRate(
+ this::detectAndRemoveOutlierEntries,
+ initialDelayDetect.toMillis(),
+ PROBE_INTERVAL.toMillis(),
+ TimeUnit.MILLISECONDS);
+ }
+
+ /** Stop running health checking */
+ public void stop() {
+ if (probeTaskScheduledFuture != null) {
+ probeTaskScheduledFuture.cancel(false);
+ }
+ if (detectAndRemoveTaskScheduledFuture != null) {
+ detectAndRemoveTaskScheduledFuture.cancel(false);
+ }
+ }
+
+ /** Runs probes on all the channels in the pool. */
+ @VisibleForTesting
+ void runProbes() {
+ Preconditions.checkState(
+ channelPrimer instanceof BigtableChannelPrimer,
+ "Health checking can only be enabled with BigtableChannelPrimer, found %s",
+ channelPrimer);
+ BigtableChannelPrimer primer = (BigtableChannelPrimer) channelPrimer;
+
+ for (Entry entry : this.entrySupplier.get()) {
+ ApiFuture probeFuture =
+ primer.sendPrimeRequestsAsync(entry.getManagedChannel());
+ probeFuture.addListener(
+ () -> onComplete(entry, clock.instant(), probeFuture), MoreExecutors.directExecutor());
+ }
+ }
+
+ /** Callback that will update Entry data on probe complete. */
+ @VisibleForTesting
+ void onComplete(Entry entry, Instant startTime, ApiFuture probeFuture) {
+ boolean success;
+ try {
+ probeFuture.get(PROBE_DEADLINE.toMillis(), TimeUnit.MILLISECONDS);
+ success = true;
+ } catch (Exception e) {
+ success = false;
+ logger.log(Level.WARNING, "Probe failed", e);
+ }
+ addProbeResult(entry, ProbeResult.create(startTime, success));
+ }
+
+ @VisibleForTesting
+ void addProbeResult(Entry entry, ProbeResult result) {
+ entry.probeHistory.add(result);
+ if (result.isSuccessful()) {
+ entry.successfulProbesInWindow.incrementAndGet();
+ } else {
+ entry.failedProbesInWindow.incrementAndGet();
+ }
+ pruneHistory(entry);
+ }
+
+ @VisibleForTesting
+ void pruneHistory(Entry entry) {
+ Instant windowStart = clock.instant().minus(WINDOW_DURATION);
+ while (!entry.probeHistory.isEmpty()
+ && entry.probeHistory.peek().startTime().isBefore(windowStart)) {
+ ProbeResult removedResult = entry.probeHistory.poll();
+ if (removedResult.isSuccessful()) {
+ entry.successfulProbesInWindow.decrementAndGet();
+ } else {
+ entry.failedProbesInWindow.decrementAndGet();
+ }
+ }
+ }
+
+ /** Checks if a single entry is currently healthy based on its probe history. */
+ @VisibleForTesting
+ boolean isEntryHealthy(Entry entry) {
+ int failedProbes = entry.failedProbesInWindow.get();
+ int totalProbes = failedProbes + entry.successfulProbesInWindow.get();
+
+ if (totalProbes < MIN_PROBES_FOR_EVALUATION) {
+ return true; // Not enough data, assume healthy.
+ }
+
+ double failureRate = ((double) failedProbes / totalProbes) * 100.0;
+ return failureRate < SINGLE_CHANNEL_FAILURE_PERCENT_THRESHOLD;
+ }
+
+ /**
+ * Finds a channel that is an outlier in terms of health.
+ *
+ * @return the entry to be evicted. Returns null if nothing to evict.
+ */
+ @Nullable
+ @VisibleForTesting
+ Entry findOutlierEntry() {
+ List unhealthyEntries =
+ this.entrySupplier.get().stream()
+ .filter(entry -> !isEntryHealthy(entry))
+ .collect(Collectors.toList());
+
+ int poolSize = this.entrySupplier.get().size();
+ if (unhealthyEntries.isEmpty() || poolSize == 0) {
+ return null;
+ }
+
+ // If more than CIRCUITBREAKER_PERCENT of channels are unhealthy we won't evict
+ double unhealthyPercent = (double) unhealthyEntries.size() / poolSize * 100.0;
+ if (unhealthyPercent >= POOLWIDE_BAD_CHANNEL_CIRCUITBREAKER_PERCENT) {
+ return null;
+ }
+
+ return unhealthyEntries.stream()
+ .max(Comparator.comparingInt(entry -> entry.failedProbesInWindow.get()))
+ .orElse(null);
+ }
+
+ /** Periodically detects and removes outlier channels from the pool. */
+ @VisibleForTesting
+ void detectAndRemoveOutlierEntries() {
+ if (clock.instant().isBefore(lastEviction.plus(MIN_EVICTION_INTERVAL))) {
+ // Primitive but effective rate-limiting.
+ return;
+ }
+ Entry outlier = findOutlierEntry();
+ if (outlier != null) {
+ this.lastEviction = clock.instant();
+ outlier.failedProbesInWindow.set(0);
+ outlier.successfulProbesInWindow.set(0);
+ outlier.probeHistory.clear();
+ outlier.getManagedChannel().enterIdle();
+ }
+ }
+}
diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPrimer.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPrimer.java
new file mode 100644
index 0000000000..ea7cc70175
--- /dev/null
+++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPrimer.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.gaxx.grpc;
+
+import com.google.api.core.ApiFuture;
+import com.google.api.core.InternalApi;
+import com.google.bigtable.v2.PingAndWarmResponse;
+import io.grpc.ManagedChannel;
+
+@InternalApi("For internal use by google-cloud-java clients only")
+public interface ChannelPrimer {
+ void primeChannel(ManagedChannel channel);
+
+ ApiFuture sendPrimeRequestsAsync(ManagedChannel channel);
+}
diff --git a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/BigtableDataClientFactoryTest.java b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/BigtableDataClientFactoryTest.java
index 42746bbecc..c3d326fbef 100644
--- a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/BigtableDataClientFactoryTest.java
+++ b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/BigtableDataClientFactoryTest.java
@@ -27,7 +27,6 @@
import com.google.api.gax.rpc.WatchdogProvider;
import com.google.bigtable.v2.BigtableGrpc;
import com.google.bigtable.v2.FeatureFlags;
-import com.google.bigtable.v2.InstanceName;
import com.google.bigtable.v2.MutateRowRequest;
import com.google.bigtable.v2.MutateRowResponse;
import com.google.bigtable.v2.PingAndWarmRequest;
@@ -40,6 +39,7 @@
import com.google.common.base.Preconditions;
import com.google.common.io.BaseEncoding;
import io.grpc.Attributes;
+import io.grpc.Grpc;
import io.grpc.Metadata;
import io.grpc.Server;
import io.grpc.ServerCall;
@@ -50,9 +50,10 @@
import io.grpc.stub.StreamObserver;
import java.io.IOException;
import java.lang.reflect.Method;
-import java.util.LinkedList;
-import java.util.List;
+import java.net.SocketAddress;
import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.LinkedBlockingDeque;
import org.junit.After;
import org.junit.Before;
@@ -87,6 +88,7 @@ public class BigtableDataClientFactoryTest {
private final BlockingQueue setUpAttributes = new LinkedBlockingDeque<>();
private final BlockingQueue terminateAttributes = new LinkedBlockingDeque<>();
private final BlockingQueue requestMetadata = new LinkedBlockingDeque<>();
+ private final ConcurrentMap warmedChannels = new ConcurrentHashMap<>();
@Before
public void setUp() throws IOException {
@@ -101,6 +103,15 @@ public Listener interceptCall(
Metadata headers,
ServerCallHandler next) {
requestMetadata.add(headers);
+
+ // Check if the call is PingAndWarm and mark the channel address as warmed up.
+ if (BigtableGrpc.getPingAndWarmMethod().equals(call.getMethodDescriptor())) {
+ SocketAddress remoteAddr =
+ call.getAttributes().get(Grpc.TRANSPORT_ATTR_REMOTE_ADDR);
+ if (remoteAddr != null) {
+ warmedChannels.put(remoteAddr, true);
+ }
+ }
return next.startCall(call, headers);
}
})
@@ -278,21 +289,8 @@ public void testCreateWithRefreshingChannel() throws Exception {
Mockito.verify(executorProvider, Mockito.times(1)).getExecutor();
Mockito.verify(watchdogProvider, Mockito.times(1)).getWatchdog();
- // Make sure that the clients are sharing the same ChannelPool
- assertThat(setUpAttributes).hasSize(poolSize);
-
- // Make sure that prime requests were sent only once per table per connection
- assertThat(service.pingAndWarmRequests).hasSize(poolSize);
- List expectedRequests = new LinkedList<>();
- for (int i = 0; i < poolSize; i++) {
- expectedRequests.add(
- PingAndWarmRequest.newBuilder()
- .setName(InstanceName.format(DEFAULT_PROJECT_ID, DEFAULT_INSTANCE_ID))
- .setAppProfileId(DEFAULT_APP_PROFILE_ID)
- .build());
- }
-
- assertThat(service.pingAndWarmRequests).containsExactly(expectedRequests.toArray());
+ assertThat(warmedChannels).hasSize(poolSize);
+ assertThat(warmedChannels.values()).doesNotContain(false);
// Wait for all the connections to close asynchronously
factory.close();
diff --git a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimerTest.java b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimerTest.java
index f29fa6200a..7913e97540 100644
--- a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimerTest.java
+++ b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimerTest.java
@@ -19,7 +19,7 @@
import static org.junit.Assert.assertThrows;
import com.google.api.core.ApiFunction;
-import com.google.api.core.SettableApiFuture;
+import com.google.api.core.ApiFuture;
import com.google.auth.oauth2.AccessToken;
import com.google.auth.oauth2.OAuth2Credentials;
import com.google.bigtable.v2.BigtableGrpc.BigtableImplBase;
@@ -173,7 +173,7 @@ public void testHeadersAreSent() {
// New test for the async success path
@Test
public void testAsyncSuccess() throws Exception {
- SettableApiFuture future = primer.sendPrimeRequestsAsync(channel);
+ ApiFuture future = primer.sendPrimeRequestsAsync(channel);
PingAndWarmResponse response = future.get(1, TimeUnit.SECONDS);
assertThat(response).isNotNull();
@@ -192,7 +192,7 @@ public PingAndWarmResponse apply(PingAndWarmRequest pingAndWarmRequest) {
}
};
- SettableApiFuture future = primer.sendPrimeRequestsAsync(channel);
+ ApiFuture future = primer.sendPrimeRequestsAsync(channel);
ExecutionException e =
assertThrows(ExecutionException.class, () -> future.get(5, TimeUnit.SECONDS));
diff --git a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPoolHealthCheckerTest.java b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPoolHealthCheckerTest.java
new file mode 100644
index 0000000000..6b748b1a59
--- /dev/null
+++ b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPoolHealthCheckerTest.java
@@ -0,0 +1,192 @@
+/*
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.gaxx.grpc;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import com.google.api.core.SettableApiFuture;
+import com.google.bigtable.v2.PingAndWarmResponse;
+import com.google.cloud.bigtable.data.v2.stub.BigtableChannelPrimer;
+import com.google.cloud.bigtable.gaxx.grpc.BigtableChannelPool.Entry;
+import com.google.cloud.bigtable.gaxx.grpc.ChannelPoolHealthChecker.ProbeResult;
+import com.google.common.collect.ImmutableList;
+import com.google.common.util.concurrent.ListeningScheduledExecutorService;
+import com.google.common.util.concurrent.testing.TestingExecutors;
+import io.grpc.ManagedChannel;
+import java.time.Clock;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.function.Supplier;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.junit.MockitoJUnit;
+import org.mockito.junit.MockitoRule;
+
+@RunWith(JUnit4.class)
+public class ChannelPoolHealthCheckerTest {
+ @Rule public MockitoRule mockitoRule = MockitoJUnit.rule();
+ @Mock private BigtableChannelPrimer mockPrimer;
+ private ListeningScheduledExecutorService executor;
+ @Mock private Clock mockClock;
+ private ChannelPoolHealthChecker healthChecker;
+ private List channelList;
+
+ @Before
+ public void setUp() {
+ executor = TestingExecutors.sameThreadScheduledExecutor();
+ channelList = new ArrayList<>();
+ Supplier> entrySupplier = () -> ImmutableList.copyOf(channelList);
+
+ healthChecker = new ChannelPoolHealthChecker(entrySupplier, mockPrimer, executor, mockClock);
+
+ // Default the clock to a fixed time
+ Mockito.when(mockClock.instant()).thenReturn(Instant.parse("2025-08-01T10:00:00Z"));
+ }
+
+ // Helper method to create test entries
+ private Entry createTestEntry() {
+ ManagedChannel mockChannel = Mockito.mock(ManagedChannel.class);
+ return new Entry(mockChannel);
+ }
+
+ @After
+ public void tearDown() {
+ executor.shutdownNow();
+ }
+
+ @Test
+ public void testOnComplete_successUpdatesCounters() {
+ Entry entry = createTestEntry();
+ channelList.add(entry);
+
+ SettableApiFuture successFuture = SettableApiFuture.create();
+ Mockito.when(mockPrimer.sendPrimeRequestsAsync(entry.getManagedChannel()))
+ .thenReturn(successFuture);
+
+ healthChecker.runProbes();
+
+ successFuture.set(PingAndWarmResponse.getDefaultInstance());
+
+ assertThat(entry.successfulProbesInWindow.get()).isEqualTo(1);
+ assertThat(entry.failedProbesInWindow.get()).isEqualTo(0);
+ }
+
+ @Test
+ public void testOnComplete_cancellationIsFailure() {
+ Entry entry = createTestEntry();
+ channelList.add(entry);
+
+ SettableApiFuture hangingFuture = SettableApiFuture.create();
+ Mockito.when(mockPrimer.sendPrimeRequestsAsync(entry.getManagedChannel()))
+ .thenReturn(hangingFuture);
+
+ healthChecker.runProbes();
+
+ hangingFuture.cancel(true);
+
+ assertThat(entry.failedProbesInWindow.get()).isEqualTo(1);
+ assertThat(entry.successfulProbesInWindow.get()).isEqualTo(0);
+ }
+
+ @Test
+ public void testPruning_removesOldProbesAndCounters() {
+ Entry entry = createTestEntry();
+ healthChecker.addProbeResult(entry, ProbeResult.create(mockClock.instant(), false));
+ assertThat(entry.failedProbesInWindow.get()).isEqualTo(1);
+
+ Instant newTime = mockClock.instant().plus(Duration.ofMinutes(6));
+ Mockito.when(mockClock.instant()).thenReturn(newTime);
+ healthChecker.pruneHistory(entry); // Manually call for direct testing
+
+ assertThat(entry.probeHistory).isEmpty();
+ assertThat(entry.failedProbesInWindow.get()).isEqualTo(0);
+ }
+
+ @Test
+ public void testEviction_selectsUnhealthyChannel() {
+ Entry healthyEntry = createTestEntry();
+ Entry badEntry = createTestEntry();
+ Entry worseEntry = createTestEntry();
+
+ // A channel needs at least 4 probes to be considered for eviction
+ healthyEntry.successfulProbesInWindow.set(10); // 0% failure -> healthy
+ badEntry.failedProbesInWindow.set(3); // 3/13 = 23% failure -> healthy
+ badEntry.successfulProbesInWindow.set(10);
+ worseEntry.failedProbesInWindow.set(10); // 10/10 = 100% failure -> unhealthy
+
+ channelList.addAll(Arrays.asList(healthyEntry, badEntry, worseEntry));
+
+ healthChecker.detectAndRemoveOutlierEntries();
+
+ // Assert that only the unhealthy channel was evicted
+ Mockito.verify(worseEntry.getManagedChannel()).enterIdle();
+ Mockito.verify(badEntry.getManagedChannel(), Mockito.never()).enterIdle();
+ Mockito.verify(healthyEntry.getManagedChannel(), Mockito.never()).enterIdle();
+ }
+
+ @Test
+ public void testEviction_selectsMostUnhealthyChannel() {
+ Entry healthyEntry = createTestEntry();
+ Entry badEntry = createTestEntry();
+ Entry worseEntry = createTestEntry();
+
+ // A channel needs at least 4 probes to be considered for eviction
+ healthyEntry.successfulProbesInWindow.set(10); // 0% failure -> healthy
+ badEntry.failedProbesInWindow.set(8); // 8/13 = 61% failure -> unhealthy
+ badEntry.successfulProbesInWindow.set(10);
+ worseEntry.failedProbesInWindow.set(10); // 10/10 = 100% failure -> most unhealthy
+
+ channelList.addAll(Arrays.asList(healthyEntry, badEntry, worseEntry));
+
+ healthChecker.detectAndRemoveOutlierEntries();
+
+ // Assert that only the unhealthy channel was evicted
+ Mockito.verify(worseEntry.getManagedChannel()).enterIdle();
+ Mockito.verify(badEntry.getManagedChannel(), Mockito.never()).enterIdle();
+ Mockito.verify(healthyEntry.getManagedChannel(), Mockito.never()).enterIdle();
+ }
+
+ @Test
+ public void testCircuitBreaker_preventsEviction() {
+ Entry entry1 = createTestEntry();
+ Entry entry2 = createTestEntry();
+ Entry entry3 = createTestEntry();
+ channelList.addAll(Arrays.asList(entry1, entry2, entry3));
+
+ // Set failure counts to exceed 60% SINGLE_CHANNEL_FAILURE_PERCENT_THRESHOLD with at least
+ // MIN_PROBES_FOR_EVALUATION (4) failures
+ for (Entry entry : channelList) {
+ entry.failedProbesInWindow.set(4); // 4 failures, 0 successes = 100% failure rate
+ }
+
+ healthChecker.detectAndRemoveOutlierEntries();
+
+ // The circuit breaker should engage because 3/3 channels (100%) are unhealthy,
+ // which is greater than the 70% POOLWIDE_BAD_CHANNEL_CIRCUITBREAKER_PERCENT threshold.
+ Mockito.verify(entry1.getManagedChannel(), Mockito.never()).enterIdle();
+ Mockito.verify(entry2.getManagedChannel(), Mockito.never()).enterIdle();
+ Mockito.verify(entry3.getManagedChannel(), Mockito.never()).enterIdle();
+ }
+}
diff --git a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/gaxx/grpc/HealthChecker.java b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/gaxx/grpc/HealthChecker.java
new file mode 100644
index 0000000000..5e8e00b040
--- /dev/null
+++ b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/gaxx/grpc/HealthChecker.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.google.cloud.bigtable.gaxx.grpc;
+
+import com.google.api.core.InternalApi;
+
+@InternalApi
+public interface HealthChecker {
+ void start();
+
+ void stop();
+}