open-telemetry · Oberon00 · Sep 27, 2019 · Sep 19, 2019 · Sep 23, 2019 · Sep 23, 2019
diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/trace/export/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/trace/export/__init__.py
@@ -13,9 +13,13 @@
 # limitations under the License.
 
 import logging
+import queue
+import threading
 import typing
 from enum import Enum
 
+from opentelemetry.sdk import util
+
 from .. import Span, SpanProcessor
 
 logger = logging.getLogger(__name__)
@@ -78,6 +82,111 @@ def shutdown(self) -> None:
         self.span_exporter.shutdown()
 
 
+class BatchExportSpanProcessor(SpanProcessor):
+    """Batch span processor implementation.
+
+    BatchExportSpanProcessor is an implementation of `SpanProcessor` that
+    batches ended spans and pushes them to the configured `SpanExporter`.
+    """
+
+    def __init__(
+        self,
+        span_exporter: SpanExporter,
+        max_queue_size: int = 2048,
+        schedule_delay_millis: float = 5000,
+        max_export_batch_size: int = 512,
+    ):
+        if max_queue_size <= 0:
+            raise ValueError("max_queue_size must be a positive integer.")
+
+        if schedule_delay_millis <= 0:
+            raise ValueError("schedule_delay_millis must be positive.")
+
+        if max_export_batch_size <= 0:
+            raise ValueError(
+                "max_export_batch_size must be a positive integer."
+            )
+
+        if max_export_batch_size > max_queue_size:
+            raise ValueError(
+                "max_export_batch_size must be less and equal to max_export_batch_size."
-                "max_export_batch_size must be less and equal to max_export_batch_size."
+                "max_export_batch_size must be less than or equal to max_export_batch_size."
-                "max_export_batch_size must be less and equal to max_export_batch_size."
+                "max_export_batch_size must be less than or equal to max_export_batch_size."
+            )
+
+        self.span_exporter = span_exporter
+        self.queue = queue.Queue(max_queue_size)
+        self.worker_thread = threading.Thread(target=self.worker, daemon=True)
+        self.condition = threading.Condition()
-        self.condition = threading.Condition()
+        self.condition = threading.Condition(threading.Lock())
-        self.condition = threading.Condition()
+        self.condition = threading.Condition(threading.Lock())
+        self.schedule_delay_millis = schedule_delay_millis
+        self.max_export_batch_size = max_export_batch_size
+        self.half_max_queue_size = max_queue_size // 2
+        self.done = False
+
+        self.worker_thread.start()
+
+    def on_start(self, span: Span) -> None:
+        pass
+
+    def on_end(self, span: Span) -> None:
+        try:
+            self.queue.put(span, block=False)
+        except queue.Full:
+            # TODO: dropped spans counter?
+            pass
+        if self.queue.qsize() >= self.half_max_queue_size:
-        if self.queue.qsize() >= self.half_max_queue_size:
+        if self.queue.qsize() == self.half_max_queue_size:
-        if self.queue.qsize() >= self.half_max_queue_size:
+        if self.queue.qsize() == self.half_max_queue_size:
+            with self.condition:
+                self.condition.notify_all()
+
+    def worker(self):
+        timeout = self.schedule_delay_millis / 1e3
+        while not self.done:
+            if self.queue.qsize() < self.max_export_batch_size:
+                with self.condition:
+                    self.condition.wait(timeout)
+                    if self.queue.empty():
+                        # spurious notification, let's wait again
+                        continue
+                    if self.done:
+                        # missing spans will be sent when calling flush
+                        break
+
+            # substract the duration of this export call to the next timeout
+            start = util.time_ns()
+            self.export()
+            end = util.time_ns()
+            duration = (end - start) / 1e9
+            timeout = self.schedule_delay_millis / 1e3 - duration
+
+        # be sure that all spans are sent
+        self._flush()
+
+    def export(self):
+        """Exports at most max_export_batch_size spans."""
+        idx = 0
+        spans = []
+        # currently only a single thread acts as consumer, so queue.get() will
+        # never block
+        while idx < self.max_export_batch_size and not self.queue.empty():
+            spans.append(self.queue.get())
+            idx += 1
+        try:
+            self.span_exporter.export(spans)
+        # pylint: disable=broad-except
+        except Exception as exc:
+            logger.warning("Exception while exporting data: %s", exc)
+
+    def _flush(self):
+        while not self.queue.empty():
+            self.export()
+
+    def shutdown(self) -> None:
+        # signal the worker thread to finish and then wait for it
+        self.done = True
+        with self.condition:
+            self.condition.notify_all()
+        self.worker_thread.join()
+        self.span_exporter.shutdown()
+
+
 class ConsoleSpanExporter(SpanExporter):
     """Implementation of :class:`SpanExporter` that prints spans to the
     console.

diff --git a/opentelemetry-sdk/tests/trace/export/test_export.py b/opentelemetry-sdk/tests/trace/export/test_export.py
@@ -12,33 +12,166 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import time
 import unittest
 
 from opentelemetry.sdk import trace
 from opentelemetry.sdk.trace import export
 
 
+class MySpanExporter(export.SpanExporter):
+    """Very simple span exporter used for testing."""
+
+    def __init__(self, destination, max_export_batch_size=None):
+        self.destination = destination
+        self.max_export_batch_size = max_export_batch_size
+
+    def export(self, spans: trace.Span) -> export.SpanExportResult:
+        if (
+            self.max_export_batch_size is not None
+            and len(spans) > self.max_export_batch_size
+        ):
+            raise ValueError("Batch is too big")
+        self.destination.extend(span.name for span in spans)
+        return export.SpanExportResult.SUCCESS
+
+
 class TestSimpleExportSpanProcessor(unittest.TestCase):
     def test_simple_span_processor(self):
-        class MySpanExporter(export.SpanExporter):
-            def __init__(self, destination):
-                self.destination = destination
+        tracer = trace.Tracer()
+
+        spans_names_list = []
+
+        my_exporter = MySpanExporter(destination=spans_names_list)
+        span_processor = export.SimpleExportSpanProcessor(my_exporter)
+        tracer.add_span_processor(span_processor)
 
-            def export(self, spans: trace.Span) -> export.SpanExportResult:
-                self.destination.extend(span.name for span in spans)
-                return export.SpanExportResult.SUCCESS
+        with tracer.start_span("foo"):
+            with tracer.start_span("bar"):
+                with tracer.start_span("xxx"):
+                    pass
+
+        self.assertListEqual(["xxx", "bar", "foo"], spans_names_list)
 
+
+class TestBatchExportSpanProcessor(unittest.TestCase):
+    def test_batch_span_processor(self):
         tracer = trace.Tracer()
 
         spans_names_list = []
 
         my_exporter = MySpanExporter(destination=spans_names_list)
-        span_processor = export.SimpleExportSpanProcessor(my_exporter)
+        span_processor = export.BatchExportSpanProcessor(my_exporter)
         tracer.add_span_processor(span_processor)
 
         with tracer.start_span("foo"):
             with tracer.start_span("bar"):
                 with tracer.start_span("xxx"):
                     pass
 
+        # call shutdown on specific span processor
+        # TODO: this call is missing in the tracer
+        span_processor.shutdown()
         self.assertListEqual(["xxx", "bar", "foo"], spans_names_list)
+
+    def test_batch_span_processor_lossless(self):
+        """Test that no spans are lost when sending max_queue_size spans"""
+        tracer = trace.Tracer()
+
+        spans_names_list = []
+
+        my_exporter = MySpanExporter(
+            destination=spans_names_list, max_export_batch_size=128
+        )
+        span_processor = export.BatchExportSpanProcessor(
+            my_exporter, max_queue_size=512, max_export_batch_size=128
+        )
+        tracer.add_span_processor(span_processor)
+
+        for idx in range(512):
+            with tracer.start_span("foo{}".format(idx)):
+                pass
+
+        # call shutdown on specific span processor
+        # TODO: this call is missing in the tracer
+        span_processor.shutdown()
+        self.assertEqual(len(spans_names_list), 512)
+
+    def test_batch_span_processor_scheduled_delay(self):
+        """Test that spans are exported each schedule_delay_millis"""
+        tracer = trace.Tracer()
+
+        spans_names_list = []
+
+        my_exporter = MySpanExporter(destination=spans_names_list)
+        span_processor = export.BatchExportSpanProcessor(
+            my_exporter, schedule_delay_millis=50
+        )
+        tracer.add_span_processor(span_processor)
+
+        # start single span
+        with tracer.start_span("foo1"):
+            pass
+
+        time.sleep(0.05 + 0.02)
+        # span should be already exported
+        self.assertEqual(len(spans_names_list), 1)
+
+        # call shutdown on specific span processor
+        # TODO: this call is missing in the tracer
+        span_processor.shutdown()
+
+    def test_batch_span_processor_parameters(self):
+        # zero max_queue_size
+        self.assertRaises(
+            ValueError, export.BatchExportSpanProcessor, None, max_queue_size=0
+        )
+
+        # negative max_queue_size
+        self.assertRaises(
+            ValueError,
+            export.BatchExportSpanProcessor,
+            None,
+            max_queue_size=-500,
+        )
+
+        # zero schedule_delay_millis
+        self.assertRaises(
+            ValueError,
+            export.BatchExportSpanProcessor,
+            None,
+            schedule_delay_millis=0,
+        )
+
+        # negative schedule_delay_millis
+        self.assertRaises(
+            ValueError,
+            export.BatchExportSpanProcessor,
+            None,
+            schedule_delay_millis=-500,
+        )
+
+        # zero max_export_batch_size
+        self.assertRaises(
+            ValueError,
+            export.BatchExportSpanProcessor,
+            None,
+            max_export_batch_size=0,
+        )
+
+        # negative max_export_batch_size
+        self.assertRaises(
+            ValueError,
+            export.BatchExportSpanProcessor,
+            None,
+            max_export_batch_size=-500,
+        )
+
+        # max_export_batch_size > max_queue_size:
+        self.assertRaises(
+            ValueError,
+            export.BatchExportSpanProcessor,
+            None,
+            max_queue_size=256,
+            max_export_batch_size=512,
+        )