Skip to content

Commit

Permalink
Optimize batch span processor
Browse files Browse the repository at this point in the history
Description:
Batch span processor currently is aggressive in the sense that any new spans are sent to the exporter,
this involves lots of overhead from signalling under heavy load and overhead from constant polling by exporter thread
under less load.

This approach uses a multi producer and single consumer bounded concurrent queue. Worker threads add the spans to the queue and signal the exporter when the queue size reaches maxExportBatchSize, exporter thread copies the queue into an array and initiates IO.

![image](https://user-images.githubusercontent.com/62265954/110030822-03551b00-7ceb-11eb-89e8-4c5107fe434e.png)

![image](https://user-images.githubusercontent.com/62265954/110030844-094afc00-7ceb-11eb-8973-886f9982e8fe.png)

Context and more benchmarking results are in #2968.
  • Loading branch information
sbandadd committed Mar 15, 2021
1 parent 0d687bd commit c102d8f
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.Mockito.when;

import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.sdk.common.CompletableResultCode;
import io.opentelemetry.sdk.resources.Resource;
import io.opentelemetry.sdk.trace.SdkTracerProvider;
import io.opentelemetry.sdk.trace.SpanLimits;
Expand All @@ -19,22 +21,30 @@
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.TimeUnit;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.mockito.junit.jupiter.MockitoSettings;
import org.mockito.quality.Strictness;

// NB: We use AssertJ extracting to reflectively access implementation details to test configuration
// because the use of BatchSpanProcessor makes it difficult to verify values through public means.
@ExtendWith(MockitoExtension.class)
@MockitoSettings(strictness = Strictness.LENIENT)
class TracerProviderConfigurationTest {

private static final ConfigProperties EMPTY =
ConfigProperties.createForTest(Collections.emptyMap());

@Mock private SpanExporter exporter;
@Mock private SpanExporter mockSpanExporter;

@BeforeEach
void setUp() {
when(mockSpanExporter.shutdown()).thenReturn(CompletableResultCode.ofSuccess());
}

@Test
void configureTracerProvider() {
Expand Down Expand Up @@ -69,7 +79,7 @@ void configureTracerProvider() {
@Test
void configureSpanProcessor_empty() {
BatchSpanProcessor processor =
TracerProviderConfiguration.configureSpanProcessor(EMPTY, exporter);
TracerProviderConfiguration.configureSpanProcessor(EMPTY, mockSpanExporter);

try {
assertThat(processor)
Expand All @@ -83,12 +93,8 @@ void configureSpanProcessor_empty() {
.extracting("exporterTimeoutNanos")
.isEqualTo(TimeUnit.MILLISECONDS.toNanos(30000));
assertThat(worker).extracting("maxExportBatchSize").isEqualTo(512);
assertThat(worker)
.extracting("queue")
.isInstanceOfSatisfying(
ArrayBlockingQueue.class,
queue -> assertThat(queue.remainingCapacity()).isEqualTo(2048));
assertThat(worker).extracting("spanExporter").isEqualTo(exporter);
assertThat(worker).extracting("maxQueueSize").isEqualTo(2048);
assertThat(worker).extracting("spanExporter").isEqualTo(mockSpanExporter);
});
} finally {
processor.shutdown();
Expand All @@ -105,7 +111,7 @@ void configureSpanProcessor_configured() {

BatchSpanProcessor processor =
TracerProviderConfiguration.configureSpanProcessor(
ConfigProperties.createForTest(properties), exporter);
ConfigProperties.createForTest(properties), mockSpanExporter);

try {
assertThat(processor)
Expand All @@ -119,12 +125,8 @@ void configureSpanProcessor_configured() {
.extracting("exporterTimeoutNanos")
.isEqualTo(TimeUnit.MILLISECONDS.toNanos(4));
assertThat(worker).extracting("maxExportBatchSize").isEqualTo(3);
assertThat(worker)
.extracting("queue")
.isInstanceOfSatisfying(
ArrayBlockingQueue.class,
queue -> assertThat(queue.remainingCapacity()).isEqualTo(2));
assertThat(worker).extracting("spanExporter").isEqualTo(exporter);
assertThat(worker).extracting("maxQueueSize").isEqualTo(2);
assertThat(worker).extracting("spanExporter").isEqualTo(mockSpanExporter);
});
} finally {
processor.shutdown();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,11 @@
import java.util.Collections;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.logging.Level;
import java.util.logging.Logger;
Expand Down Expand Up @@ -72,7 +75,8 @@ public static BatchSpanProcessorBuilder builder(SpanExporter spanExporter) {
scheduleDelayNanos,
maxExportBatchSize,
exporterTimeoutNanos,
new ArrayBlockingQueue<>(maxQueueSize));
new ConcurrentReadableSpanQueue(),
maxQueueSize);
Thread workerThread = new DaemonThreadFactory(WORKER_THREAD_NAME).newThread(worker);
workerThread.start();
}
Expand Down Expand Up @@ -113,7 +117,32 @@ public CompletableResultCode forceFlush() {

// Visible for testing
ArrayList<SpanData> getBatch() {
return worker.batch;
return new ArrayList<>(worker.batch);
}

// Helper class to workaround the linear complexity of size() function in ConcurrentLinkedQueue.
private static final class ConcurrentReadableSpanQueue {
private final ConcurrentLinkedQueue<ReadableSpan> queue = new ConcurrentLinkedQueue<>();
private final AtomicInteger size = new AtomicInteger(0);

public boolean isEmpty() {
return queue.isEmpty();
}

public boolean offer(ReadableSpan span) {
queue.offer(span);
size.incrementAndGet();
return true;
}

public ReadableSpan poll() {
size.decrementAndGet();
return queue.poll();
}

public int size() {
return size.get();
}
}

// Worker is a thread that batches multiple spans and calls the registered SpanExporter to export
Expand All @@ -128,26 +157,29 @@ private static final class Worker implements Runnable {
private final long scheduleDelayNanos;
private final int maxExportBatchSize;
private final long exporterTimeoutNanos;

private long nextExportTime;

private final BlockingQueue<ReadableSpan> queue;

private final ConcurrentReadableSpanQueue queue;
private final int maxQueueSize;
private final AtomicLong addedSpansCounter = new AtomicLong(0);
private final AtomicReference<CompletableResultCode> flushRequested = new AtomicReference<>();
private volatile boolean continueWork = true;
private final ArrayList<SpanData> batch;
private final BlockingQueue<Boolean> signal;

private Worker(
SpanExporter spanExporter,
long scheduleDelayNanos,
int maxExportBatchSize,
long exporterTimeoutNanos,
BlockingQueue<ReadableSpan> queue) {
ConcurrentReadableSpanQueue queue,
int maxQueueSize) {
this.spanExporter = spanExporter;
this.scheduleDelayNanos = scheduleDelayNanos;
this.maxExportBatchSize = maxExportBatchSize;
this.exporterTimeoutNanos = exporterTimeoutNanos;
this.queue = queue;
this.maxQueueSize = maxQueueSize;
this.signal = new ArrayBlockingQueue<Boolean>(1);
Meter meter = GlobalMetricsProvider.getMeter("io.opentelemetry.sdk.trace");
meter
.longValueObserverBuilder("queueSize")
Expand All @@ -156,7 +188,7 @@ private Worker(
.setUpdater(
result ->
result.observe(
queue.size(),
queue.size.get(),
Labels.of(SPAN_PROCESSOR_TYPE_LABEL, SPAN_PROCESSOR_TYPE_VALUE)))
.build();
LongCounter processedSpansCounter =
Expand All @@ -178,44 +210,47 @@ private Worker(
}

private void addSpan(ReadableSpan span) {
if (!queue.offer(span)) {
if (queue.size() >= maxQueueSize) {
droppedSpans.add(1);
} else {
queue.offer(span);
if (addedSpansCounter.incrementAndGet() % maxExportBatchSize == 0) {
signal.offer(true);
}
}
}

@Override
public void run() {
updateNextExportTime();

while (continueWork) {
if (flushRequested.get() != null) {
flush();
}

try {
ReadableSpan lastElement = queue.poll(100, TimeUnit.MILLISECONDS);
if (lastElement != null) {
batch.add(lastElement.toSpanData());
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return;
while (!queue.isEmpty() && batch.size() < maxExportBatchSize) {
batch.add(queue.poll().toSpanData());
}

if (batch.size() >= maxExportBatchSize || System.nanoTime() >= nextExportTime) {
exportCurrentBatch();
updateNextExportTime();
}
if (queue.isEmpty()) {
try {
long pollWaitTime = nextExportTime - System.nanoTime();
if (pollWaitTime > 0) {
signal.poll(pollWaitTime, TimeUnit.NANOSECONDS);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return;
}
}
}
}

private void flush() {
int spansToFlush = queue.size();
while (spansToFlush > 0) {
ReadableSpan span = queue.poll();
assert span != null;
batch.add(span.toSpanData());
spansToFlush--;
while (!queue.isEmpty()) {
batch.add(queue.poll().toSpanData());
if (batch.size() >= maxExportBatchSize) {
exportCurrentBatch();
}
Expand Down Expand Up @@ -252,8 +287,10 @@ private CompletableResultCode shutdown() {

private CompletableResultCode forceFlush() {
CompletableResultCode flushResult = new CompletableResultCode();
// we set the atomic here to trigger the worker loop to do a flush on its next iteration.
flushRequested.compareAndSet(null, flushResult);
// we set the atomic here to trigger the worker loop to do a flush of the entire queue.
if (flushRequested.compareAndSet(null, flushResult)) {
signal.offer(true);
}
CompletableResultCode possibleResult = flushRequested.get();
// there's a race here where the flush happening in the worker loop could complete before we
// get what's in the atomic. In that case, just return success, since we know it succeeded in
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,12 +197,19 @@ void forceExport() {
.build();

sdkTracerProvider = SdkTracerProvider.builder().addSpanProcessor(batchSpanProcessor).build();
for (int i = 0; i < 100; i++) {
for (int i = 0; i < 50; i++) {
createEndedSpan("notExported");
}
List<SpanData> exported = waitingSpanExporter.waitForExport();
assertThat(exported).isNotNull();
assertThat(exported.size()).isEqualTo(98);
assertThat(exported.size()).isEqualTo(49);

for (int i = 0; i < 50; i++) {
createEndedSpan("notExported");
}
exported = waitingSpanExporter.waitForExport();
assertThat(exported).isNotNull();
assertThat(exported.size()).isEqualTo(49);

batchSpanProcessor.forceFlush().join(10, TimeUnit.SECONDS);
exported = waitingSpanExporter.getExported();
Expand Down

0 comments on commit c102d8f

Please sign in to comment.