diff --git a/google-cloud-bigtable/clirr-ignored-differences.xml b/google-cloud-bigtable/clirr-ignored-differences.xml index 42ed3e3f7e..a5f9d8c3e6 100644 --- a/google-cloud-bigtable/clirr-ignored-differences.xml +++ b/google-cloud-bigtable/clirr-ignored-differences.xml @@ -426,4 +426,34 @@ *create* * + + 4001 + com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimer + com/google/api/gax/grpc/ChannelPrimer + + + 4001 + com/google/cloud/bigtable/data/v2/stub/NoOpChannelPrimer + com/google/api/gax/grpc/ChannelPrimer + + + 7005 + com/google/cloud/bigtable/gaxx/grpc/BigtableChannelPool + *create* + * + + + + 7005 + com/google/cloud/bigtable/gaxx/grpc/BigtableTransportChannelProvider + *create* + * + + + + 7006 + com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimer + *sendPrimeRequestsAsync* + com.google.api.core.ApiFuture + diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimer.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimer.java index 4ace6c7567..97c6e364c8 100644 --- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimer.java +++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimer.java @@ -15,14 +15,15 @@ */ package com.google.cloud.bigtable.data.v2.stub; +import com.google.api.core.ApiFuture; import com.google.api.core.InternalApi; import com.google.api.core.SettableApiFuture; -import com.google.api.gax.grpc.ChannelPrimer; import com.google.auth.Credentials; import com.google.bigtable.v2.BigtableGrpc; import com.google.bigtable.v2.InstanceName; import com.google.bigtable.v2.PingAndWarmRequest; import com.google.bigtable.v2.PingAndWarmResponse; +import com.google.cloud.bigtable.gaxx.grpc.ChannelPrimer; import io.grpc.CallCredentials; import io.grpc.CallOptions; import io.grpc.ClientCall; @@ -110,8 +111,7 @@ private void sendPrimeRequestsBlocking(ManagedChannel managedChannel) { } } - public SettableApiFuture sendPrimeRequestsAsync( - ManagedChannel managedChannel) { + public ApiFuture sendPrimeRequestsAsync(ManagedChannel managedChannel) { ClientCall clientCall = managedChannel.newCall( BigtableGrpc.getPingAndWarmMethod(), diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/BigtableClientContext.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/BigtableClientContext.java index 233294fe4e..92a984a015 100644 --- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/BigtableClientContext.java +++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/BigtableClientContext.java @@ -20,7 +20,6 @@ import com.google.api.gax.core.BackgroundResource; import com.google.api.gax.core.CredentialsProvider; import com.google.api.gax.core.FixedCredentialsProvider; -import com.google.api.gax.grpc.ChannelPrimer; import com.google.api.gax.grpc.InstantiatingGrpcChannelProvider; import com.google.api.gax.rpc.ClientContext; import com.google.auth.Credentials; @@ -34,6 +33,7 @@ import com.google.cloud.bigtable.data.v2.stub.metrics.MetricsProvider; import com.google.cloud.bigtable.data.v2.stub.metrics.NoopMetricsProvider; import com.google.cloud.bigtable.gaxx.grpc.BigtableTransportChannelProvider; +import com.google.cloud.bigtable.gaxx.grpc.ChannelPrimer; import io.grpc.ManagedChannelBuilder; import io.grpc.opentelemetry.GrpcOpenTelemetry; import io.opentelemetry.api.OpenTelemetry; diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/NoOpChannelPrimer.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/NoOpChannelPrimer.java index aed412fd0d..3cb98d9dee 100644 --- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/NoOpChannelPrimer.java +++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/NoOpChannelPrimer.java @@ -15,8 +15,11 @@ */ package com.google.cloud.bigtable.data.v2.stub; +import com.google.api.core.ApiFuture; import com.google.api.core.InternalApi; -import com.google.api.gax.grpc.ChannelPrimer; +import com.google.api.core.SettableApiFuture; +import com.google.bigtable.v2.PingAndWarmResponse; +import com.google.cloud.bigtable.gaxx.grpc.ChannelPrimer; import io.grpc.ManagedChannel; @InternalApi @@ -28,7 +31,14 @@ static NoOpChannelPrimer create() { private NoOpChannelPrimer() {} @Override - public void primeChannel(ManagedChannel managedChannel) { + public void primeChannel(ManagedChannel channel) { // No op } + + @Override + public ApiFuture sendPrimeRequestsAsync(ManagedChannel channel) { + SettableApiFuture future = SettableApiFuture.create(); + future.set(PingAndWarmResponse.getDefaultInstance()); + return future; + } } diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableChannelPool.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableChannelPool.java index da7bd4f956..c8ced11158 100644 --- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableChannelPool.java +++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableChannelPool.java @@ -17,7 +17,7 @@ import com.google.api.core.InternalApi; import com.google.api.gax.grpc.ChannelFactory; -import com.google.api.gax.grpc.ChannelPrimer; +import com.google.cloud.bigtable.gaxx.grpc.ChannelPoolHealthChecker.ProbeResult; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -31,9 +31,11 @@ import io.grpc.MethodDescriptor; import io.grpc.Status; import java.io.IOException; +import java.time.Clock; import java.util.ArrayList; import java.util.List; import java.util.concurrent.CancellationException; +import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; @@ -64,9 +66,9 @@ public class BigtableChannelPool extends ManagedChannel { private final ChannelPrimer channelPrimer; private final ScheduledExecutorService executor; - private final Object entryWriteLock = new Object(); @VisibleForTesting final AtomicReference> entries = new AtomicReference<>(); + private final ChannelPoolHealthChecker channelPoolHealthChecker; private final AtomicInteger indexTicker = new AtomicInteger(); private final String authority; @@ -96,6 +98,10 @@ public static BigtableChannelPool create( this.settings = settings; this.channelFactory = channelFactory; this.channelPrimer = channelPrimer; + Clock systemClock = Clock.systemUTC(); + this.channelPoolHealthChecker = + new ChannelPoolHealthChecker(entries::get, channelPrimer, executor, systemClock); + this.channelPoolHealthChecker.start(); ImmutableList.Builder initialListBuilder = ImmutableList.builder(); @@ -445,15 +451,32 @@ static class Entry { private final AtomicInteger maxOutstanding = new AtomicInteger(); - // Flag that the channel should be closed once all of the outstanding RPC complete. + /** Queue storing the last 5 minutes of probe results */ + @VisibleForTesting + final ConcurrentLinkedQueue probeHistory = new ConcurrentLinkedQueue<>(); + + /** + * Keep both # of failed and # of successful probes so that we don't have to check size() on the + * ConcurrentLinkedQueue all the time + */ + final AtomicInteger failedProbesInWindow = new AtomicInteger(); + + final AtomicInteger successfulProbesInWindow = new AtomicInteger(); + + // Flag that the channel should be closed once all the outstanding RPCs complete. private final AtomicBoolean shutdownRequested = new AtomicBoolean(); // Flag that the channel has been closed. private final AtomicBoolean shutdownInitiated = new AtomicBoolean(); - private Entry(ManagedChannel channel) { + @VisibleForTesting + Entry(ManagedChannel channel) { this.channel = channel; } + ManagedChannel getManagedChannel() { + return this.channel; + } + int getAndResetMaxOutstanding() { return maxOutstanding.getAndSet(outstandingRpcs.get()); } @@ -468,7 +491,7 @@ private boolean retain() { // register desire to start RPC int currentOutstanding = outstandingRpcs.incrementAndGet(); - // Rough book keeping + // Rough bookkeeping int prevMax = maxOutstanding.get(); if (currentOutstanding > prevMax) { maxOutstanding.incrementAndGet(); diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableTransportChannelProvider.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableTransportChannelProvider.java index 3c4cf24bca..ba18994619 100644 --- a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableTransportChannelProvider.java +++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/BigtableTransportChannelProvider.java @@ -18,7 +18,6 @@ import com.google.api.core.InternalApi; import com.google.api.gax.grpc.ChannelFactory; import com.google.api.gax.grpc.ChannelPoolSettings; -import com.google.api.gax.grpc.ChannelPrimer; import com.google.api.gax.grpc.GrpcTransportChannel; import com.google.api.gax.grpc.InstantiatingGrpcChannelProvider; import com.google.api.gax.rpc.TransportChannel; diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPoolHealthChecker.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPoolHealthChecker.java new file mode 100644 index 0000000000..cb0841e7a1 --- /dev/null +++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPoolHealthChecker.java @@ -0,0 +1,252 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.gaxx.grpc; + +import com.google.api.core.ApiFuture; +import com.google.auto.value.AutoValue; +import com.google.bigtable.v2.PingAndWarmResponse; +import com.google.cloud.bigtable.data.v2.stub.BigtableChannelPrimer; +import com.google.cloud.bigtable.gaxx.grpc.BigtableChannelPool.Entry; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.util.concurrent.MoreExecutors; +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; +import java.util.Comparator; +import java.util.List; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; +import javax.annotation.Nullable; + +/** Class that manages the health checking in the BigtableChannelPool */ +class ChannelPoolHealthChecker { + + private static final Logger logger = Logger.getLogger(ChannelPoolHealthChecker.class.getName()); + + // Configuration constants + // Window_Duration is the duration over which we keep probe results + private static final Duration WINDOW_DURATION = Duration.ofMinutes(5); + // Interval at which we probe channel health + private static final Duration PROBE_INTERVAL = Duration.ofSeconds(30); + // Timeout deadline for a probe + @VisibleForTesting static final Duration PROBE_DEADLINE = Duration.ofMillis(500); + // Minimum interval between new idle channel evictions + private static final Duration MIN_EVICTION_INTERVAL = Duration.ofMinutes(10); + // Minimum number of probes that must be sent to a channel before it will be considered for + // eviction + private static final int MIN_PROBES_FOR_EVALUATION = 4; + // Percentage of probes that must fail for a channel to be considered unhealthy + private static final int SINGLE_CHANNEL_FAILURE_PERCENT_THRESHOLD = 60; + // "Circuitbreaker" - If this or a higher percentage of channels in a pool are bad, we will not + // evict any channels + private static final int POOLWIDE_BAD_CHANNEL_CIRCUITBREAKER_PERCENT = 70; + + /** Inner class to represent the result of a single probe. */ + @AutoValue + abstract static class ProbeResult { + abstract Instant startTime(); + + abstract boolean isSuccessful(); + + static ProbeResult create(Instant startTime, boolean success) { + return new AutoValue_ChannelPoolHealthChecker_ProbeResult(startTime, success); + } + } + + private final Supplier> entrySupplier; + private volatile Instant lastEviction; + private final ScheduledExecutorService executor; + + private final ChannelPrimer channelPrimer; + + private ScheduledFuture probeTaskScheduledFuture; + private ScheduledFuture detectAndRemoveTaskScheduledFuture; + + private final Clock clock; + + /** Constructor for the pool health checker. */ + public ChannelPoolHealthChecker( + Supplier> entrySupplier, + ChannelPrimer channelPrimer, + ScheduledExecutorService executor, + Clock clock) { + this.entrySupplier = entrySupplier; + this.lastEviction = Instant.MIN; + this.channelPrimer = channelPrimer; + this.executor = executor; + this.clock = clock; + } + + void start() { + if (!(channelPrimer instanceof BigtableChannelPrimer)) { + logger.log( + Level.WARNING, + "Provided channelPrimer not an instance of BigtableChannelPrimer, not checking channel health."); + return; + } + + Duration initialDelayProbe = + Duration.ofMillis(ThreadLocalRandom.current().nextLong(PROBE_INTERVAL.toMillis())); + this.probeTaskScheduledFuture = + executor.scheduleAtFixedRate( + this::runProbes, + initialDelayProbe.toMillis(), + PROBE_INTERVAL.toMillis(), + TimeUnit.MILLISECONDS); + Duration initialDelayDetect = + Duration.ofMillis(ThreadLocalRandom.current().nextLong(PROBE_INTERVAL.toMillis())); + this.detectAndRemoveTaskScheduledFuture = + executor.scheduleAtFixedRate( + this::detectAndRemoveOutlierEntries, + initialDelayDetect.toMillis(), + PROBE_INTERVAL.toMillis(), + TimeUnit.MILLISECONDS); + } + + /** Stop running health checking */ + public void stop() { + if (probeTaskScheduledFuture != null) { + probeTaskScheduledFuture.cancel(false); + } + if (detectAndRemoveTaskScheduledFuture != null) { + detectAndRemoveTaskScheduledFuture.cancel(false); + } + } + + /** Runs probes on all the channels in the pool. */ + @VisibleForTesting + void runProbes() { + Preconditions.checkState( + channelPrimer instanceof BigtableChannelPrimer, + "Health checking can only be enabled with BigtableChannelPrimer, found %s", + channelPrimer); + BigtableChannelPrimer primer = (BigtableChannelPrimer) channelPrimer; + + for (Entry entry : this.entrySupplier.get()) { + ApiFuture probeFuture = + primer.sendPrimeRequestsAsync(entry.getManagedChannel()); + probeFuture.addListener( + () -> onComplete(entry, clock.instant(), probeFuture), MoreExecutors.directExecutor()); + } + } + + /** Callback that will update Entry data on probe complete. */ + @VisibleForTesting + void onComplete(Entry entry, Instant startTime, ApiFuture probeFuture) { + boolean success; + try { + probeFuture.get(PROBE_DEADLINE.toMillis(), TimeUnit.MILLISECONDS); + success = true; + } catch (Exception e) { + success = false; + logger.log(Level.WARNING, "Probe failed", e); + } + addProbeResult(entry, ProbeResult.create(startTime, success)); + } + + @VisibleForTesting + void addProbeResult(Entry entry, ProbeResult result) { + entry.probeHistory.add(result); + if (result.isSuccessful()) { + entry.successfulProbesInWindow.incrementAndGet(); + } else { + entry.failedProbesInWindow.incrementAndGet(); + } + pruneHistory(entry); + } + + @VisibleForTesting + void pruneHistory(Entry entry) { + Instant windowStart = clock.instant().minus(WINDOW_DURATION); + while (!entry.probeHistory.isEmpty() + && entry.probeHistory.peek().startTime().isBefore(windowStart)) { + ProbeResult removedResult = entry.probeHistory.poll(); + if (removedResult.isSuccessful()) { + entry.successfulProbesInWindow.decrementAndGet(); + } else { + entry.failedProbesInWindow.decrementAndGet(); + } + } + } + + /** Checks if a single entry is currently healthy based on its probe history. */ + @VisibleForTesting + boolean isEntryHealthy(Entry entry) { + int failedProbes = entry.failedProbesInWindow.get(); + int totalProbes = failedProbes + entry.successfulProbesInWindow.get(); + + if (totalProbes < MIN_PROBES_FOR_EVALUATION) { + return true; // Not enough data, assume healthy. + } + + double failureRate = ((double) failedProbes / totalProbes) * 100.0; + return failureRate < SINGLE_CHANNEL_FAILURE_PERCENT_THRESHOLD; + } + + /** + * Finds a channel that is an outlier in terms of health. + * + * @return the entry to be evicted. Returns null if nothing to evict. + */ + @Nullable + @VisibleForTesting + Entry findOutlierEntry() { + List unhealthyEntries = + this.entrySupplier.get().stream() + .filter(entry -> !isEntryHealthy(entry)) + .collect(Collectors.toList()); + + int poolSize = this.entrySupplier.get().size(); + if (unhealthyEntries.isEmpty() || poolSize == 0) { + return null; + } + + // If more than CIRCUITBREAKER_PERCENT of channels are unhealthy we won't evict + double unhealthyPercent = (double) unhealthyEntries.size() / poolSize * 100.0; + if (unhealthyPercent >= POOLWIDE_BAD_CHANNEL_CIRCUITBREAKER_PERCENT) { + return null; + } + + return unhealthyEntries.stream() + .max(Comparator.comparingInt(entry -> entry.failedProbesInWindow.get())) + .orElse(null); + } + + /** Periodically detects and removes outlier channels from the pool. */ + @VisibleForTesting + void detectAndRemoveOutlierEntries() { + if (clock.instant().isBefore(lastEviction.plus(MIN_EVICTION_INTERVAL))) { + // Primitive but effective rate-limiting. + return; + } + Entry outlier = findOutlierEntry(); + if (outlier != null) { + this.lastEviction = clock.instant(); + outlier.failedProbesInWindow.set(0); + outlier.successfulProbesInWindow.set(0); + outlier.probeHistory.clear(); + outlier.getManagedChannel().enterIdle(); + } + } +} diff --git a/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPrimer.java b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPrimer.java new file mode 100644 index 0000000000..ea7cc70175 --- /dev/null +++ b/google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPrimer.java @@ -0,0 +1,28 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.gaxx.grpc; + +import com.google.api.core.ApiFuture; +import com.google.api.core.InternalApi; +import com.google.bigtable.v2.PingAndWarmResponse; +import io.grpc.ManagedChannel; + +@InternalApi("For internal use by google-cloud-java clients only") +public interface ChannelPrimer { + void primeChannel(ManagedChannel channel); + + ApiFuture sendPrimeRequestsAsync(ManagedChannel channel); +} diff --git a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/BigtableDataClientFactoryTest.java b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/BigtableDataClientFactoryTest.java index 42746bbecc..c3d326fbef 100644 --- a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/BigtableDataClientFactoryTest.java +++ b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/BigtableDataClientFactoryTest.java @@ -27,7 +27,6 @@ import com.google.api.gax.rpc.WatchdogProvider; import com.google.bigtable.v2.BigtableGrpc; import com.google.bigtable.v2.FeatureFlags; -import com.google.bigtable.v2.InstanceName; import com.google.bigtable.v2.MutateRowRequest; import com.google.bigtable.v2.MutateRowResponse; import com.google.bigtable.v2.PingAndWarmRequest; @@ -40,6 +39,7 @@ import com.google.common.base.Preconditions; import com.google.common.io.BaseEncoding; import io.grpc.Attributes; +import io.grpc.Grpc; import io.grpc.Metadata; import io.grpc.Server; import io.grpc.ServerCall; @@ -50,9 +50,10 @@ import io.grpc.stub.StreamObserver; import java.io.IOException; import java.lang.reflect.Method; -import java.util.LinkedList; -import java.util.List; +import java.net.SocketAddress; import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import java.util.concurrent.LinkedBlockingDeque; import org.junit.After; import org.junit.Before; @@ -87,6 +88,7 @@ public class BigtableDataClientFactoryTest { private final BlockingQueue setUpAttributes = new LinkedBlockingDeque<>(); private final BlockingQueue terminateAttributes = new LinkedBlockingDeque<>(); private final BlockingQueue requestMetadata = new LinkedBlockingDeque<>(); + private final ConcurrentMap warmedChannels = new ConcurrentHashMap<>(); @Before public void setUp() throws IOException { @@ -101,6 +103,15 @@ public Listener interceptCall( Metadata headers, ServerCallHandler next) { requestMetadata.add(headers); + + // Check if the call is PingAndWarm and mark the channel address as warmed up. + if (BigtableGrpc.getPingAndWarmMethod().equals(call.getMethodDescriptor())) { + SocketAddress remoteAddr = + call.getAttributes().get(Grpc.TRANSPORT_ATTR_REMOTE_ADDR); + if (remoteAddr != null) { + warmedChannels.put(remoteAddr, true); + } + } return next.startCall(call, headers); } }) @@ -278,21 +289,8 @@ public void testCreateWithRefreshingChannel() throws Exception { Mockito.verify(executorProvider, Mockito.times(1)).getExecutor(); Mockito.verify(watchdogProvider, Mockito.times(1)).getWatchdog(); - // Make sure that the clients are sharing the same ChannelPool - assertThat(setUpAttributes).hasSize(poolSize); - - // Make sure that prime requests were sent only once per table per connection - assertThat(service.pingAndWarmRequests).hasSize(poolSize); - List expectedRequests = new LinkedList<>(); - for (int i = 0; i < poolSize; i++) { - expectedRequests.add( - PingAndWarmRequest.newBuilder() - .setName(InstanceName.format(DEFAULT_PROJECT_ID, DEFAULT_INSTANCE_ID)) - .setAppProfileId(DEFAULT_APP_PROFILE_ID) - .build()); - } - - assertThat(service.pingAndWarmRequests).containsExactly(expectedRequests.toArray()); + assertThat(warmedChannels).hasSize(poolSize); + assertThat(warmedChannels.values()).doesNotContain(false); // Wait for all the connections to close asynchronously factory.close(); diff --git a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimerTest.java b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimerTest.java index f29fa6200a..7913e97540 100644 --- a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimerTest.java +++ b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/data/v2/stub/BigtableChannelPrimerTest.java @@ -19,7 +19,7 @@ import static org.junit.Assert.assertThrows; import com.google.api.core.ApiFunction; -import com.google.api.core.SettableApiFuture; +import com.google.api.core.ApiFuture; import com.google.auth.oauth2.AccessToken; import com.google.auth.oauth2.OAuth2Credentials; import com.google.bigtable.v2.BigtableGrpc.BigtableImplBase; @@ -173,7 +173,7 @@ public void testHeadersAreSent() { // New test for the async success path @Test public void testAsyncSuccess() throws Exception { - SettableApiFuture future = primer.sendPrimeRequestsAsync(channel); + ApiFuture future = primer.sendPrimeRequestsAsync(channel); PingAndWarmResponse response = future.get(1, TimeUnit.SECONDS); assertThat(response).isNotNull(); @@ -192,7 +192,7 @@ public PingAndWarmResponse apply(PingAndWarmRequest pingAndWarmRequest) { } }; - SettableApiFuture future = primer.sendPrimeRequestsAsync(channel); + ApiFuture future = primer.sendPrimeRequestsAsync(channel); ExecutionException e = assertThrows(ExecutionException.class, () -> future.get(5, TimeUnit.SECONDS)); diff --git a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPoolHealthCheckerTest.java b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPoolHealthCheckerTest.java new file mode 100644 index 0000000000..6b748b1a59 --- /dev/null +++ b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/gaxx/grpc/ChannelPoolHealthCheckerTest.java @@ -0,0 +1,192 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.gaxx.grpc; + +import static com.google.common.truth.Truth.assertThat; + +import com.google.api.core.SettableApiFuture; +import com.google.bigtable.v2.PingAndWarmResponse; +import com.google.cloud.bigtable.data.v2.stub.BigtableChannelPrimer; +import com.google.cloud.bigtable.gaxx.grpc.BigtableChannelPool.Entry; +import com.google.cloud.bigtable.gaxx.grpc.ChannelPoolHealthChecker.ProbeResult; +import com.google.common.collect.ImmutableList; +import com.google.common.util.concurrent.ListeningScheduledExecutorService; +import com.google.common.util.concurrent.testing.TestingExecutors; +import io.grpc.ManagedChannel; +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.function.Supplier; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.MockitoJUnit; +import org.mockito.junit.MockitoRule; + +@RunWith(JUnit4.class) +public class ChannelPoolHealthCheckerTest { + @Rule public MockitoRule mockitoRule = MockitoJUnit.rule(); + @Mock private BigtableChannelPrimer mockPrimer; + private ListeningScheduledExecutorService executor; + @Mock private Clock mockClock; + private ChannelPoolHealthChecker healthChecker; + private List channelList; + + @Before + public void setUp() { + executor = TestingExecutors.sameThreadScheduledExecutor(); + channelList = new ArrayList<>(); + Supplier> entrySupplier = () -> ImmutableList.copyOf(channelList); + + healthChecker = new ChannelPoolHealthChecker(entrySupplier, mockPrimer, executor, mockClock); + + // Default the clock to a fixed time + Mockito.when(mockClock.instant()).thenReturn(Instant.parse("2025-08-01T10:00:00Z")); + } + + // Helper method to create test entries + private Entry createTestEntry() { + ManagedChannel mockChannel = Mockito.mock(ManagedChannel.class); + return new Entry(mockChannel); + } + + @After + public void tearDown() { + executor.shutdownNow(); + } + + @Test + public void testOnComplete_successUpdatesCounters() { + Entry entry = createTestEntry(); + channelList.add(entry); + + SettableApiFuture successFuture = SettableApiFuture.create(); + Mockito.when(mockPrimer.sendPrimeRequestsAsync(entry.getManagedChannel())) + .thenReturn(successFuture); + + healthChecker.runProbes(); + + successFuture.set(PingAndWarmResponse.getDefaultInstance()); + + assertThat(entry.successfulProbesInWindow.get()).isEqualTo(1); + assertThat(entry.failedProbesInWindow.get()).isEqualTo(0); + } + + @Test + public void testOnComplete_cancellationIsFailure() { + Entry entry = createTestEntry(); + channelList.add(entry); + + SettableApiFuture hangingFuture = SettableApiFuture.create(); + Mockito.when(mockPrimer.sendPrimeRequestsAsync(entry.getManagedChannel())) + .thenReturn(hangingFuture); + + healthChecker.runProbes(); + + hangingFuture.cancel(true); + + assertThat(entry.failedProbesInWindow.get()).isEqualTo(1); + assertThat(entry.successfulProbesInWindow.get()).isEqualTo(0); + } + + @Test + public void testPruning_removesOldProbesAndCounters() { + Entry entry = createTestEntry(); + healthChecker.addProbeResult(entry, ProbeResult.create(mockClock.instant(), false)); + assertThat(entry.failedProbesInWindow.get()).isEqualTo(1); + + Instant newTime = mockClock.instant().plus(Duration.ofMinutes(6)); + Mockito.when(mockClock.instant()).thenReturn(newTime); + healthChecker.pruneHistory(entry); // Manually call for direct testing + + assertThat(entry.probeHistory).isEmpty(); + assertThat(entry.failedProbesInWindow.get()).isEqualTo(0); + } + + @Test + public void testEviction_selectsUnhealthyChannel() { + Entry healthyEntry = createTestEntry(); + Entry badEntry = createTestEntry(); + Entry worseEntry = createTestEntry(); + + // A channel needs at least 4 probes to be considered for eviction + healthyEntry.successfulProbesInWindow.set(10); // 0% failure -> healthy + badEntry.failedProbesInWindow.set(3); // 3/13 = 23% failure -> healthy + badEntry.successfulProbesInWindow.set(10); + worseEntry.failedProbesInWindow.set(10); // 10/10 = 100% failure -> unhealthy + + channelList.addAll(Arrays.asList(healthyEntry, badEntry, worseEntry)); + + healthChecker.detectAndRemoveOutlierEntries(); + + // Assert that only the unhealthy channel was evicted + Mockito.verify(worseEntry.getManagedChannel()).enterIdle(); + Mockito.verify(badEntry.getManagedChannel(), Mockito.never()).enterIdle(); + Mockito.verify(healthyEntry.getManagedChannel(), Mockito.never()).enterIdle(); + } + + @Test + public void testEviction_selectsMostUnhealthyChannel() { + Entry healthyEntry = createTestEntry(); + Entry badEntry = createTestEntry(); + Entry worseEntry = createTestEntry(); + + // A channel needs at least 4 probes to be considered for eviction + healthyEntry.successfulProbesInWindow.set(10); // 0% failure -> healthy + badEntry.failedProbesInWindow.set(8); // 8/13 = 61% failure -> unhealthy + badEntry.successfulProbesInWindow.set(10); + worseEntry.failedProbesInWindow.set(10); // 10/10 = 100% failure -> most unhealthy + + channelList.addAll(Arrays.asList(healthyEntry, badEntry, worseEntry)); + + healthChecker.detectAndRemoveOutlierEntries(); + + // Assert that only the unhealthy channel was evicted + Mockito.verify(worseEntry.getManagedChannel()).enterIdle(); + Mockito.verify(badEntry.getManagedChannel(), Mockito.never()).enterIdle(); + Mockito.verify(healthyEntry.getManagedChannel(), Mockito.never()).enterIdle(); + } + + @Test + public void testCircuitBreaker_preventsEviction() { + Entry entry1 = createTestEntry(); + Entry entry2 = createTestEntry(); + Entry entry3 = createTestEntry(); + channelList.addAll(Arrays.asList(entry1, entry2, entry3)); + + // Set failure counts to exceed 60% SINGLE_CHANNEL_FAILURE_PERCENT_THRESHOLD with at least + // MIN_PROBES_FOR_EVALUATION (4) failures + for (Entry entry : channelList) { + entry.failedProbesInWindow.set(4); // 4 failures, 0 successes = 100% failure rate + } + + healthChecker.detectAndRemoveOutlierEntries(); + + // The circuit breaker should engage because 3/3 channels (100%) are unhealthy, + // which is greater than the 70% POOLWIDE_BAD_CHANNEL_CIRCUITBREAKER_PERCENT threshold. + Mockito.verify(entry1.getManagedChannel(), Mockito.never()).enterIdle(); + Mockito.verify(entry2.getManagedChannel(), Mockito.never()).enterIdle(); + Mockito.verify(entry3.getManagedChannel(), Mockito.never()).enterIdle(); + } +} diff --git a/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/gaxx/grpc/HealthChecker.java b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/gaxx/grpc/HealthChecker.java new file mode 100644 index 0000000000..5e8e00b040 --- /dev/null +++ b/google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/gaxx/grpc/HealthChecker.java @@ -0,0 +1,25 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigtable.gaxx.grpc; + +import com.google.api.core.InternalApi; + +@InternalApi +public interface HealthChecker { + void start(); + + void stop(); +}