-
Notifications
You must be signed in to change notification settings - Fork 143
/
Copy pathrecorder.py
622 lines (519 loc) · 21.7 KB
/
recorder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
import copy
import json
import logging
import os
import platform
import time
from aws_xray_sdk import global_sdk_config
from aws_xray_sdk.version import VERSION
from .models.segment import Segment, SegmentContextManager
from .models.subsegment import Subsegment, SubsegmentContextManager
from .models.default_dynamic_naming import DefaultDynamicNaming
from .models.dummy_entities import DummySegment, DummySubsegment
from .emitters.udp_emitter import UDPEmitter
from .streaming.default_streaming import DefaultStreaming
from .context import Context
from .daemon_config import DaemonConfig
from .plugins.utils import get_plugin_modules
from .lambda_launcher import check_in_lambda
from .exceptions.exceptions import SegmentNameMissingException, SegmentNotFoundException
from .utils import stacktrace
log = logging.getLogger(__name__)
TRACING_NAME_KEY = 'AWS_XRAY_TRACING_NAME'
DAEMON_ADDR_KEY = 'AWS_XRAY_DAEMON_ADDRESS'
CONTEXT_MISSING_KEY = 'AWS_XRAY_CONTEXT_MISSING'
XRAY_META = {
'xray': {
'sdk': 'X-Ray for Python',
'sdk_version': VERSION
}
}
SERVICE_INFO = {
'runtime': platform.python_implementation(),
'runtime_version': platform.python_version()
}
class AWSXRayRecorder:
"""
A global AWS X-Ray recorder that will begin/end segments/subsegments
and send them to the X-Ray daemon. This recorder is initialized during
loading time so you can use::
from aws_xray_sdk.core import xray_recorder
in your module to access it
"""
def __init__(self):
self._streaming = DefaultStreaming()
context = check_in_lambda()
if context:
# Special handling when running on AWS Lambda.
from .sampling.local.sampler import LocalSampler
self._context = context
self.streaming_threshold = 0
self._sampler = LocalSampler()
else:
from .sampling.sampler import DefaultSampler
self._context = Context()
self._sampler = DefaultSampler()
self._emitter = UDPEmitter()
self._sampling = True
self._max_trace_back = 10
self._plugins = None
self._service = os.getenv(TRACING_NAME_KEY)
self._dynamic_naming = None
self._aws_metadata = copy.deepcopy(XRAY_META)
self._origin = None
self._stream_sql = True
if type(self.sampler).__name__ == 'DefaultSampler':
self.sampler.load_settings(DaemonConfig(), self.context)
def configure(self, sampling=None, plugins=None,
context_missing=None, sampling_rules=None,
daemon_address=None, service=None,
context=None, emitter=None, streaming=None,
dynamic_naming=None, streaming_threshold=None,
max_trace_back=None, sampler=None,
stream_sql=True):
"""Configure global X-Ray recorder.
Configure needs to run before patching thrid party libraries
to avoid creating dangling subsegment.
:param bool sampling: If sampling is enabled, every time the recorder
creates a segment it decides whether to send this segment to
the X-Ray daemon. This setting is not used if the recorder
is running in AWS Lambda. The recorder always respect the incoming
sampling decisions regardless of this setting.
:param sampling_rules: Pass a set of local custom sampling rules.
Can be an absolute path of the sampling rule config json file
or a dictionary that defines those rules. This will also be the
fallback rules in case of centralized sampling opted-in while
the cetralized sampling rules are not available.
:param sampler: The sampler used to make sampling decisions. The SDK
provides two built-in samplers. One is centralized rules based and
the other is local rules based. The former is the default.
:param tuple plugins: plugins that add extra metadata to each segment.
Currently available plugins are EC2Plugin, ECS plugin and
ElasticBeanstalkPlugin.
If you want to disable all previously enabled plugins,
pass an empty tuple ``()``.
:param str context_missing: recorder behavior when it tries to mutate
a segment or add a subsegment but there is no active segment.
RUNTIME_ERROR means the recorder will raise an exception.
LOG_ERROR means the recorder will only log the error and
do nothing.
IGNORE_ERROR means the recorder will do nothing
:param str daemon_address: The X-Ray daemon address where the recorder
sends data to.
:param str service: default segment name if creating a segment without
providing a name.
:param context: You can pass your own implementation of context storage
for active segment/subsegment by overriding the default
``Context`` class.
:param emitter: The emitter that sends a segment/subsegment to
the X-Ray daemon. You can override ``UDPEmitter`` class.
:param dynamic_naming: a string that defines a pattern that host names
should match. Alternatively you can pass a module which
overrides ``DefaultDynamicNaming`` module.
:param streaming: The streaming module to stream out trace documents
when they grow too large. You can override ``DefaultStreaming``
class to have your own implementation of the streaming process.
:param streaming_threshold: If breaks within a single segment it will
start streaming out children subsegments. By default it is the
maximum number of subsegments within a segment.
:param int max_trace_back: The maxinum number of stack traces recorded
by auto-capture. Lower this if a single document becomes too large.
:param bool stream_sql: Whether SQL query texts should be streamed.
Environment variables AWS_XRAY_DAEMON_ADDRESS, AWS_XRAY_CONTEXT_MISSING
and AWS_XRAY_TRACING_NAME respectively overrides arguments
daemon_address, context_missing and service.
"""
if sampling is not None:
self.sampling = sampling
if sampler:
self.sampler = sampler
if service:
self.service = os.getenv(TRACING_NAME_KEY, service)
if sampling_rules:
self._load_sampling_rules(sampling_rules)
if emitter:
self.emitter = emitter
if daemon_address:
self.emitter.set_daemon_address(os.getenv(DAEMON_ADDR_KEY, daemon_address))
if context:
self.context = context
if context_missing:
self.context.context_missing = os.getenv(CONTEXT_MISSING_KEY, context_missing)
if dynamic_naming:
self.dynamic_naming = dynamic_naming
if streaming:
self.streaming = streaming
if streaming_threshold is not None:
self.streaming_threshold = streaming_threshold
if type(max_trace_back) == int and max_trace_back >= 0:
self.max_trace_back = max_trace_back
if stream_sql is not None:
self.stream_sql = stream_sql
if plugins:
plugin_modules = get_plugin_modules(plugins)
for plugin in plugin_modules:
plugin.initialize()
if plugin.runtime_context:
self._aws_metadata[plugin.SERVICE_NAME] = plugin.runtime_context
self._origin = plugin.ORIGIN
# handling explicitly using empty list to clean up plugins.
elif plugins is not None:
self._aws_metadata = copy.deepcopy(XRAY_META)
self._origin = None
if type(self.sampler).__name__ == 'DefaultSampler':
self.sampler.load_settings(DaemonConfig(daemon_address),
self.context, self._origin)
def in_segment(self, name=None, **segment_kwargs):
"""
Return a segment context manager.
:param str name: the name of the segment
:param dict segment_kwargs: remaining arguments passed directly to `begin_segment`
"""
return SegmentContextManager(self, name=name, **segment_kwargs)
def in_subsegment(self, name=None, **subsegment_kwargs):
"""
Return a subsegment context manager.
:param str name: the name of the subsegment
:param dict subsegment_kwargs: remaining arguments passed directly to `begin_subsegment`
"""
return SubsegmentContextManager(self, name=name, **subsegment_kwargs)
def begin_segment(self, name=None, traceid=None,
parent_id=None, sampling=None):
"""
Begin a segment on the current thread and return it. The recorder
only keeps one segment at a time. Create the second one without
closing existing one will overwrite it.
:param str name: the name of the segment
:param str traceid: trace id of the segment
:param int sampling: 0 means not sampled, 1 means sampled
"""
# Disable the recorder; return a generated dummy segment.
if not global_sdk_config.sdk_enabled():
return DummySegment(global_sdk_config.DISABLED_ENTITY_NAME)
seg_name = name or self.service
if not seg_name:
raise SegmentNameMissingException("Segment name is required.")
# Sampling decision is None if not sampled.
# In a sampled case it could be either a string or 1
# depending on if centralized or local sampling rule takes effect.
decision = True
# we respect the input sampling decision
# regardless of recorder configuration.
if sampling == 0:
decision = False
elif sampling:
decision = sampling
elif self.sampling:
decision = self._sampler.should_trace({'service': seg_name})
if not decision:
segment = DummySegment(seg_name)
else:
segment = Segment(name=seg_name, traceid=traceid,
parent_id=parent_id)
self._populate_runtime_context(segment, decision)
self.context.put_segment(segment)
return segment
def end_segment(self, end_time=None):
"""
End the current segment and send it to X-Ray daemon
if it is ready to send. Ready means segment and
all its subsegments are closed.
:param float end_time: segment completion in unix epoch in seconds.
"""
# When the SDK is disabled we return
if not global_sdk_config.sdk_enabled():
return
self.context.end_segment(end_time)
segment = self.current_segment()
if segment and segment.ready_to_send():
self._send_segment()
def current_segment(self):
"""
Return the currently active segment. In a multithreading environment,
this will make sure the segment returned is the one created by the
same thread.
"""
entity = self.get_trace_entity()
if self._is_subsegment(entity):
return entity.parent_segment
else:
return entity
def _begin_subsegment_helper(self, name, namespace='local', beginWithoutSampling=False):
'''
Helper method to begin_subsegment and begin_subsegment_without_sampling
'''
# Generating the parent dummy segment is necessary.
# We don't need to store anything in context. Assumption here
# is that we only work with recorder-level APIs.
if not global_sdk_config.sdk_enabled():
return DummySubsegment(DummySegment(global_sdk_config.DISABLED_ENTITY_NAME))
segment = self.current_segment()
if not segment:
log.warning("No segment found, cannot begin subsegment %s." % name)
return None
current_entity = self.get_trace_entity()
if not current_entity.sampled or beginWithoutSampling:
subsegment = DummySubsegment(segment, name)
else:
subsegment = Subsegment(name, namespace, segment)
self.context.put_subsegment(subsegment)
return subsegment
def begin_subsegment(self, name, namespace='local'):
"""
Begin a new subsegment.
If there is open subsegment, the newly created subsegment will be the
child of latest opened subsegment.
If not, it will be the child of the current open segment.
:param str name: the name of the subsegment.
:param str namespace: currently can only be 'local', 'remote', 'aws'.
"""
return self._begin_subsegment_helper(name, namespace)
def begin_subsegment_without_sampling(self, name):
"""
Begin a new unsampled subsegment.
If there is open subsegment, the newly created subsegment will be the
child of latest opened subsegment.
If not, it will be the child of the current open segment.
:param str name: the name of the subsegment.
"""
return self._begin_subsegment_helper(name, beginWithoutSampling=True)
def current_subsegment(self):
"""
Return the latest opened subsegment. In a multithreading environment,
this will make sure the subsegment returned is one created
by the same thread.
"""
if not global_sdk_config.sdk_enabled():
return DummySubsegment(DummySegment(global_sdk_config.DISABLED_ENTITY_NAME))
entity = self.get_trace_entity()
if self._is_subsegment(entity):
return entity
else:
return None
def end_subsegment(self, end_time=None):
"""
End the current active subsegment. If this is the last one open
under its parent segment, the entire segment will be sent.
:param float end_time: subsegment compeletion in unix epoch in seconds.
"""
if not global_sdk_config.sdk_enabled():
return
if not self.context.end_subsegment(end_time):
return
# if segment is already close, we check if we can send entire segment
# otherwise we check if we need to stream some subsegments
if self.current_segment().ready_to_send():
self._send_segment()
else:
self.stream_subsegments()
def put_annotation(self, key, value):
"""
Annotate current active trace entity with a key-value pair.
Annotations will be indexed for later search query.
:param str key: annotation key
:param object value: annotation value. Any type other than
string/number/bool will be dropped
"""
if not global_sdk_config.sdk_enabled():
return
entity = self.get_trace_entity()
if entity and entity.sampled:
entity.put_annotation(key, value)
def put_metadata(self, key, value, namespace='default'):
"""
Add metadata to the current active trace entity.
Metadata is not indexed but can be later retrieved
by BatchGetTraces API.
:param str namespace: optional. Default namespace is `default`.
It must be a string and prefix `AWS.` is reserved.
:param str key: metadata key under specified namespace
:param object value: any object that can be serialized into JSON string
"""
if not global_sdk_config.sdk_enabled():
return
entity = self.get_trace_entity()
if entity and entity.sampled:
entity.put_metadata(key, value, namespace)
def is_sampled(self):
"""
Check if the current trace entity is sampled or not.
Return `False` if no active entity found.
"""
if not global_sdk_config.sdk_enabled():
# Disabled SDK is never sampled
return False
entity = self.get_trace_entity()
if entity:
return entity.sampled
return False
def get_trace_entity(self):
"""
A pass through method to ``context.get_trace_entity()``.
"""
return self.context.get_trace_entity()
def set_trace_entity(self, trace_entity):
"""
A pass through method to ``context.set_trace_entity()``.
"""
self.context.set_trace_entity(trace_entity)
def clear_trace_entities(self):
"""
A pass through method to ``context.clear_trace_entities()``.
"""
self.context.clear_trace_entities()
def stream_subsegments(self):
"""
Stream all closed subsegments to the daemon
and remove reference to the parent segment.
No-op for a not sampled segment.
"""
segment = self.current_segment()
if self.streaming.is_eligible(segment):
self.streaming.stream(segment, self._stream_subsegment_out)
def capture(self, name=None):
"""
A decorator that records enclosed function in a subsegment.
It only works with synchronous functions.
params str name: The name of the subsegment. If not specified
the function name will be used.
"""
return self.in_subsegment(name=name)
def record_subsegment(self, wrapped, instance, args, kwargs, name,
namespace, meta_processor):
subsegment = self.begin_subsegment(name, namespace)
exception = None
stack = None
return_value = None
try:
return_value = wrapped(*args, **kwargs)
return return_value
except Exception as e:
exception = e
stack = stacktrace.get_stacktrace(limit=self.max_trace_back)
raise
finally:
# No-op if subsegment is `None` due to `LOG_ERROR`.
if subsegment is not None:
end_time = time.time()
if callable(meta_processor):
meta_processor(
wrapped=wrapped,
instance=instance,
args=args,
kwargs=kwargs,
return_value=return_value,
exception=exception,
subsegment=subsegment,
stack=stack,
)
elif exception:
subsegment.add_exception(exception, stack)
self.end_subsegment(end_time)
def _populate_runtime_context(self, segment, sampling_decision):
if self._origin:
setattr(segment, 'origin', self._origin)
segment.set_aws(copy.deepcopy(self._aws_metadata))
segment.set_service(SERVICE_INFO)
if isinstance(sampling_decision, str):
segment.set_rule_name(sampling_decision)
def _send_segment(self):
"""
Send the current segment to X-Ray daemon if it is present and
sampled, then clean up context storage.
The emitter will handle failures.
"""
segment = self.current_segment()
if not segment:
return
if segment.sampled:
self.emitter.send_entity(segment)
self.clear_trace_entities()
def _stream_subsegment_out(self, subsegment):
log.debug("streaming subsegments...")
if subsegment.sampled:
self.emitter.send_entity(subsegment)
def _load_sampling_rules(self, sampling_rules):
if not sampling_rules:
return
if isinstance(sampling_rules, dict):
self.sampler.load_local_rules(sampling_rules)
else:
with open(sampling_rules) as f:
self.sampler.load_local_rules(json.load(f))
def _is_subsegment(self, entity):
return (hasattr(entity, 'type') and entity.type == 'subsegment')
@property
def enabled(self):
return self._enabled
@enabled.setter
def enabled(self, value):
self._enabled = value
@property
def sampling(self):
return self._sampling
@sampling.setter
def sampling(self, value):
self._sampling = value
@property
def sampler(self):
return self._sampler
@sampler.setter
def sampler(self, value):
self._sampler = value
@property
def service(self):
return self._service
@service.setter
def service(self, value):
self._service = value
@property
def dynamic_naming(self):
return self._dynamic_naming
@dynamic_naming.setter
def dynamic_naming(self, value):
if isinstance(value, str):
self._dynamic_naming = DefaultDynamicNaming(value, self.service)
else:
self._dynamic_naming = value
@property
def context(self):
return self._context
@context.setter
def context(self, cxt):
self._context = cxt
@property
def emitter(self):
return self._emitter
@emitter.setter
def emitter(self, value):
self._emitter = value
@property
def streaming(self):
return self._streaming
@streaming.setter
def streaming(self, value):
self._streaming = value
@property
def streaming_threshold(self):
"""
Proxy method to Streaming module's `streaming_threshold` property.
"""
return self.streaming.streaming_threshold
@streaming_threshold.setter
def streaming_threshold(self, value):
"""
Proxy method to Streaming module's `streaming_threshold` property.
"""
self.streaming.streaming_threshold = value
@property
def max_trace_back(self):
return self._max_trace_back
@max_trace_back.setter
def max_trace_back(self, value):
self._max_trace_back = value
@property
def stream_sql(self):
return self._stream_sql
@stream_sql.setter
def stream_sql(self, value):
self._stream_sql = value