Skip to content

Commit

Permalink
Optimize batch span processor
Browse files Browse the repository at this point in the history
Description:
Batch span processor currently is aggressive in the sense that any new spans are sent to the exporter,
this involves lots of overhead from signaling under heavy load and overhead from constant polling by exporter thread
under less load. This PR makes exporter thread wait for maxExportBatchSize to avoid busy polling of the queue.

BatchSpanProcessorMultiThreadBenchmark.java result
![image](https://user-images.githubusercontent.com/62265954/111420486-893c7300-86a8-11eb-8f87-feb2f86f00fc.png)

BatchSpanProcessorCpuBenchmark.java result
![image](https://user-images.githubusercontent.com/62265954/111420492-8e012700-86a8-11eb-800e-7de1fbe2c2b1.png)
  • Loading branch information
sbandadd committed Mar 18, 2021
1 parent aac9284 commit 5ac7737
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.Mockito.when;

import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.sdk.common.CompletableResultCode;
import io.opentelemetry.sdk.resources.Resource;
import io.opentelemetry.sdk.trace.SdkTracerProvider;
import io.opentelemetry.sdk.trace.SpanLimits;
Expand All @@ -21,20 +23,29 @@
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.TimeUnit;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.mockito.junit.jupiter.MockitoSettings;
import org.mockito.quality.Strictness;

// NB: We use AssertJ extracting to reflectively access implementation details to test configuration
// because the use of BatchSpanProcessor makes it difficult to verify values through public means.
@ExtendWith(MockitoExtension.class)
@MockitoSettings(strictness = Strictness.LENIENT)
class TracerProviderConfigurationTest {

private static final ConfigProperties EMPTY =
ConfigProperties.createForTest(Collections.emptyMap());

@Mock private SpanExporter exporter;
@Mock private SpanExporter mockSpanExporter;

@BeforeEach
void setUp() {
when(mockSpanExporter.shutdown()).thenReturn(CompletableResultCode.ofSuccess());
}

@Test
void configureTracerProvider() {
Expand Down Expand Up @@ -69,7 +80,7 @@ void configureTracerProvider() {
@Test
void configureSpanProcessor_empty() {
BatchSpanProcessor processor =
TracerProviderConfiguration.configureSpanProcessor(EMPTY, exporter);
TracerProviderConfiguration.configureSpanProcessor(EMPTY, mockSpanExporter);

try {
assertThat(processor)
Expand All @@ -88,7 +99,7 @@ void configureSpanProcessor_empty() {
.isInstanceOfSatisfying(
ArrayBlockingQueue.class,
queue -> assertThat(queue.remainingCapacity()).isEqualTo(2048));
assertThat(worker).extracting("spanExporter").isEqualTo(exporter);
assertThat(worker).extracting("spanExporter").isEqualTo(mockSpanExporter);
});
} finally {
processor.shutdown();
Expand All @@ -105,7 +116,7 @@ void configureSpanProcessor_configured() {

BatchSpanProcessor processor =
TracerProviderConfiguration.configureSpanProcessor(
ConfigProperties.createForTest(properties), exporter);
ConfigProperties.createForTest(properties), mockSpanExporter);

try {
assertThat(processor)
Expand All @@ -124,7 +135,7 @@ void configureSpanProcessor_configured() {
.isInstanceOfSatisfying(
ArrayBlockingQueue.class,
queue -> assertThat(queue.remainingCapacity()).isEqualTo(2));
assertThat(worker).extracting("spanExporter").isEqualTo(exporter);
assertThat(worker).extracting("spanExporter").isEqualTo(mockSpanExporter);
});
} finally {
processor.shutdown();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,9 @@ private static final class Worker implements Runnable {
private final long exporterTimeoutNanos;

private long nextExportTime;

private final BlockingQueue<ReadableSpan> queue;

private final AtomicBoolean needSignal = new AtomicBoolean(false);
private final BlockingQueue<Boolean> signal;
private final AtomicReference<CompletableResultCode> flushRequested = new AtomicReference<>();
private volatile boolean continueWork = true;
private final ArrayList<SpanData> batch;
Expand All @@ -148,6 +148,7 @@ private Worker(
this.maxExportBatchSize = maxExportBatchSize;
this.exporterTimeoutNanos = exporterTimeoutNanos;
this.queue = queue;
this.signal = new ArrayBlockingQueue<>(1);
Meter meter = GlobalMetricsProvider.getMeter("io.opentelemetry.sdk.trace");
meter
.longValueObserverBuilder("queueSize")
Expand Down Expand Up @@ -180,6 +181,10 @@ private Worker(
private void addSpan(ReadableSpan span) {
if (!queue.offer(span)) {
droppedSpans.add(1);
} else {
if (queue.size() >= maxExportBatchSize && needSignal.get()) {
signal.offer(true);
}
}
}

Expand All @@ -191,21 +196,26 @@ public void run() {
if (flushRequested.get() != null) {
flush();
}

try {
ReadableSpan lastElement = queue.poll(100, TimeUnit.MILLISECONDS);
if (lastElement != null) {
batch.add(lastElement.toSpanData());
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return;
while (!queue.isEmpty() && batch.size() < maxExportBatchSize) {
batch.add(queue.poll().toSpanData());
}

if (batch.size() >= maxExportBatchSize || System.nanoTime() >= nextExportTime) {
exportCurrentBatch();
updateNextExportTime();
}
if (queue.isEmpty()) {
try {
long pollWaitTime = nextExportTime - System.nanoTime();
if (pollWaitTime > 0) {
needSignal.set(true);
signal.poll(pollWaitTime, TimeUnit.NANOSECONDS);
needSignal.set(false);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return;
}
}
}
}

Expand Down Expand Up @@ -252,8 +262,10 @@ private CompletableResultCode shutdown() {

private CompletableResultCode forceFlush() {
CompletableResultCode flushResult = new CompletableResultCode();
// we set the atomic here to trigger the worker loop to do a flush on its next iteration.
flushRequested.compareAndSet(null, flushResult);
// we set the atomic here to trigger the worker loop to do a flush of the entire queue.
if (flushRequested.compareAndSet(null, flushResult)) {
signal.offer(true);
}
CompletableResultCode possibleResult = flushRequested.get();
// there's a race here where the flush happening in the worker loop could complete before we
// get what's in the atomic. In that case, just return success, since we know it succeeded in
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,12 +197,19 @@ void forceExport() {
.build();

sdkTracerProvider = SdkTracerProvider.builder().addSpanProcessor(batchSpanProcessor).build();
for (int i = 0; i < 100; i++) {
for (int i = 0; i < 50; i++) {
createEndedSpan("notExported");
}
List<SpanData> exported = waitingSpanExporter.waitForExport();
assertThat(exported).isNotNull();
assertThat(exported.size()).isEqualTo(98);
assertThat(exported.size()).isEqualTo(49);

for (int i = 0; i < 50; i++) {
createEndedSpan("notExported");
}
exported = waitingSpanExporter.waitForExport();
assertThat(exported).isNotNull();
assertThat(exported.size()).isEqualTo(49);

batchSpanProcessor.forceFlush().join(10, TimeUnit.SECONDS);
exported = waitingSpanExporter.getExported();
Expand Down

0 comments on commit 5ac7737

Please sign in to comment.