-
Notifications
You must be signed in to change notification settings - Fork 56
/
vmp_stack.c
678 lines (606 loc) · 19.7 KB
/
vmp_stack.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
#include "vmp_stack.h"
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <stddef.h>
#include <assert.h>
#include "vmprof.h"
#include "compat.h"
#ifdef VMP_SUPPORTS_NATIVE_PROFILING
#if defined(VMPROF_LINUX) || defined(VMPROF_BSD)
#include "unwind/vmprof_unwind.h"
typedef mcontext_t unw_context_t;
// functions copied from libunwind using dlopen
static int (*unw_get_reg)(unw_cursor_t*, int, unw_word_t*) = NULL;
static int (*unw_step)(unw_cursor_t*) = NULL;
static int (*unw_init_local)(unw_cursor_t *, unw_context_t *) = NULL;
static int (*unw_get_proc_info)(unw_cursor_t *, unw_proc_info_t *) = NULL;
static int (*unw_get_proc_name)(unw_cursor_t *, char *, size_t, unw_word_t*) = NULL;
static int (*unw_is_signal_frame)(unw_cursor_t *) = NULL;
static int (*unw_getcontext)(unw_context_t *) = NULL;
#else
#define UNW_LOCAL_ONLY
#include <libunwind.h>
#endif
#endif
#ifdef __APPLE__
#include <mach/mach.h>
#include <mach/mach_vm.h>
#include <mach/message.h>
#include <mach/kern_return.h>
#include <mach/task_info.h>
#include <sys/types.h>
#include <unistd.h>
#include <dlfcn.h>
#elif defined(__unix__)
#include <dlfcn.h>
#endif
int _per_loop(void) {
// how many void* are written to the stack trace per loop iterations?
#ifdef RPYTHON_VMPROF
return 2;
#else
if (vmp_profiles_python_lines()) {
return 2;
}
return 1;
#endif
}
#ifdef PY_TEST
// for testing only!
PY_EVAL_RETURN_T * vmprof_eval(PY_STACK_FRAME_T *f, int throwflag) { return NULL; }
#endif
#ifdef VMP_SUPPORTS_NATIVE_PROFILING
static intptr_t *vmp_ranges = NULL;
static ssize_t vmp_range_count = 0;
static int vmp_native_traces_enabled = 0;
#endif
static int _vmp_profiles_lines = 0;
void vmp_profile_lines(int lines) {
_vmp_profiles_lines = lines;
}
int vmp_profiles_python_lines(void) {
return _vmp_profiles_lines;
}
static PY_STACK_FRAME_T * _write_python_stack_entry(PY_STACK_FRAME_T * frame, void ** result, int * depth, int max_depth)
{
int len;
int addr;
int j;
uint64_t line;
char *lnotab;
#ifndef RPYTHON_VMPROF // pypy does not support line profiling
if (vmp_profiles_python_lines()) {
// In the line profiling mode we save a line number for every frame.
// Actual line number isn't stored in the frame directly (f_lineno
// points to the beginning of the frame), so we need to compute it
// from f_lasti and f_code->co_lnotab. Here is explained what co_lnotab
// is:
// https://svn.python.org/projects/python/trunk/Objects/lnotab_notes.txt
// NOTE: the profiling overhead can be reduced by storing co_lnotab in the dump and
// moving this computation to the reader instead of doing it here.
lnotab = PyStr_AS_STRING(frame->f_code->co_lnotab);
if (lnotab != NULL) {
line = (uint64_t)frame->f_lineno;
addr = 0;
len = (int)PyStr_GET_SIZE(frame->f_code->co_lnotab);
for (j = 0; j < len; j += 2) {
addr += lnotab[j];
if (addr > frame->f_lasti) {
break;
}
line += lnotab[j+1];
}
result[*depth] = (void*) line;
*depth = *depth + 1;
} else {
result[*depth] = (void*) 0;
*depth = *depth + 1;
}
}
result[*depth] = (void*)CODE_ADDR_TO_UID(FRAME_CODE(frame));
*depth = *depth + 1;
#else
if (frame->kind == VMPROF_CODE_TAG) {
int n = *depth;
result[n++] = (void*)frame->kind;
result[n++] = (void*)frame->value;
*depth = n;
}
#ifdef PYPY_JIT_CODEMAP
else if (frame->kind == VMPROF_JITTED_TAG) {
intptr_t pc = ((intptr_t*)(frame->value - sizeof(intptr_t)))[0];
*depth = vmprof_write_header_for_jit_addr(result, *depth, pc, max_depth);
}
#endif
#endif
return FRAME_STEP(frame);
}
int vmp_walk_and_record_python_stack_only(PY_STACK_FRAME_T *frame, void ** result,
int max_depth, int depth, intptr_t pc)
{
while ((depth + _per_loop()) <= max_depth && frame) {
frame = _write_python_stack_entry(frame, result, &depth, max_depth);
}
return depth;
}
#ifdef VMP_SUPPORTS_NATIVE_PROFILING
int _write_native_stack(void* addr, void ** result, int depth, int max_depth) {
#ifdef RPYTHON_VMPROF
if (depth + 2 >= max_depth) {
// bail, do not write to unknown memory
return depth;
}
result[depth++] = (void*)VMPROF_NATIVE_TAG;
#else
if (vmp_profiles_python_lines()) {
if (depth + 2 >= max_depth) {
// bail, do not write to unknown memory
return depth;
}
// even if we do not log a python line number,
// we must keep the profile readable
result[depth++] = 0;
}
#endif
result[depth++] = addr;
return depth;
}
#endif
int vmp_walk_and_record_stack(PY_STACK_FRAME_T *frame, void ** result,
int max_depth, int signal, intptr_t pc) {
// called in signal handler
//
// This function records the stack trace for a python program. It also
// tracks native function calls if libunwind can be found on the system.
//
// The idea is the following (in the native case):
//
// 1) Remove frames until the signal frame is found (skipping it as well)
// 2) if the current frame corresponds to PyEval_EvalFrameEx (or the equivalent
// for each python version), the jump to 4)
// 3) jump to 2)
// 4) walk each python frame and record it
//
//
// There are several cases that need to be taken care of.
//
// CPython supports line profiling, PyPy does not. At the same time
// PyPy saves the information of an address in the same way as line information
// is saved in CPython. _write_python_stack_entry for details.
//
#ifdef VMP_SUPPORTS_NATIVE_PROFILING
void * func_addr;
unw_cursor_t cursor;
unw_context_t uc;
unw_proc_info_t pip;
int ret;
if (vmp_native_enabled() == 0) {
return vmp_walk_and_record_python_stack_only(frame, result, max_depth, 0, pc);
}
ret = unw_getcontext(&uc);
if (ret < 0) {
// could not initialize lib unwind cursor and context
fprintf(stderr, "WARNING: unw_getcontext did not retreive context, switching to python profiling mode \n");
vmp_native_disable();
return vmp_walk_and_record_python_stack_only(frame, result, max_depth, 0, pc);
}
ret = unw_init_local(&cursor, &uc);
if (ret < 0) {
// could not initialize lib unwind cursor and context
fprintf(stderr, "WARNING: unw_init_local did not succeed, switching to python profiling mode \n");
vmp_native_disable();
return vmp_walk_and_record_python_stack_only(frame, result, max_depth, 0, pc);
}
if (signal < 0) {
while (signal < 0) {
int err = unw_step(&cursor);
if (err <= 0) {
fprintf(stderr, "WARNING: did not find signal frame, skipping sample\n");
return 0;
}
signal++;
}
} else {
#ifdef VMPROF_LINUX
while (signal) {
int is_signal_frame = unw_is_signal_frame(&cursor);
if (is_signal_frame) {
unw_step(&cursor); // step once more discard signal frame
break;
}
int err = unw_step(&cursor);
if (err <= 0) {
fprintf(stderr,"WARNING: did not find signal frame, skipping sample\n");
return 0;
}
}
#else
// who would have guessed that unw_is_signal_frame does not work on mac os x
if (signal) {
unw_step(&cursor); // vmp_walk_and_record_stack
// get_stack_trace is inlined
unw_step(&cursor); // _vmprof_sample_stack
unw_step(&cursor); // sigprof_handler
unw_step(&cursor); // _sigtramp
}
#endif
}
int depth = 0;
//PY_STACK_FRAME_T * top_most_frame = frame;
while ((depth + _per_loop()) <= max_depth) {
unw_get_proc_info(&cursor, &pip);
func_addr = (void*)pip.start_ip;
//{
// char name[64];
// unw_word_t x;
// unw_get_proc_name(&cursor, name, 64, &x);
// printf(" %s %p\n", name, func_addr);
//}
//if (func_addr == 0) {
// unw_word_t rip = 0;
// if (unw_get_reg(&cursor, UNW_REG_IP, &rip) < 0) {
// printf("failed failed failed\n");
// }
// func_addr = rip;
// printf("func_addr is 0, now %p\n", rip);
//}
#ifdef PYPY_JIT_CODEMAP
long start_addr = 0;
unw_word_t rip = 0;
if (unw_get_reg(&cursor, UNW_REG_IP, &rip) < 0) {
return 0;
}
#endif
if (IS_VMPROF_EVAL((void*)pip.start_ip)) {
// yes we found one stack entry of the python frames!
return vmp_walk_and_record_python_stack_only(frame, result, max_depth, depth, pc);
#ifdef PYPY_JIT_CODEMAP
} else if (pypy_find_codemap_at_addr(rip, &start_addr) != NULL) {
depth = vmprof_write_header_for_jit_addr(result, depth, pc, max_depth);
return vmp_walk_and_record_python_stack_only(frame, result, max_depth, depth, pc);
#endif
} else {
// mark native routines with the first bit set,
// this is possible because compiler align to 8 bytes.
//
if (func_addr != 0x0) {
depth = _write_native_stack((void*)(((uint64_t)func_addr) | 0x1), result, depth, max_depth);
}
}
int err = unw_step(&cursor);
if (err == 0) {
break;
} else if (err < 0) {
// this sample is broken, cannot walk native level... record python level (at least)
return vmp_walk_and_record_python_stack_only(frame, result, max_depth, 0, pc);
}
}
// if we come here, the found stack trace is removed and only python stacks are recorded
#endif
return vmp_walk_and_record_python_stack_only(frame, result, max_depth, 0, pc);
}
int vmp_native_enabled(void) {
#ifdef VMP_SUPPORTS_NATIVE_PROFILING
return vmp_native_traces_enabled;
#else
return 0;
#endif
}
#ifdef VMP_SUPPORTS_NATIVE_PROFILING
int _ignore_symbols_from_path(const char * name) {
// which symbols should not be considered while walking
// the native stack?
#ifdef RPYTHON_VMPROF
if (strstr(name, "libpypy-c.so") != NULL
|| strstr(name, "pypy-c") != NULL) {
return 1;
}
#else
// cpython
if (strstr(name, "python") != NULL &&
# ifdef __unix__
strstr(name, ".so\n") == NULL
# elif defined(__APPLE__)
strstr(name, ".so") == NULL
# endif
) {
return 1;
}
#endif
return 0;
}
int _reset_vmp_ranges(void) {
// initially 10 (start, stop) entries!
int max_count = 10;
vmp_range_count = 0;
if (vmp_ranges != NULL) { free(vmp_ranges); }
vmp_ranges = malloc(max_count * sizeof(intptr_t));
return max_count;
}
int _resize_ranges(intptr_t ** cursor, int max_count) {
ptrdiff_t diff = (*cursor - vmp_ranges);
if (diff + 2 > max_count) {
max_count *= 2;
vmp_ranges = realloc(vmp_ranges, max_count*sizeof(intptr_t));
*cursor = vmp_ranges + diff;
}
return max_count;
}
intptr_t * _add_to_range(intptr_t * cursor, intptr_t start, intptr_t end) {
if (cursor[0] == start) {
// the last range is extended, this reduces the entry count
// which makes the querying faster
cursor[0] = end;
} else {
if (cursor != vmp_ranges) {
// not pointing to the first entry
cursor++;
}
cursor[0] = start;
cursor[1] = end;
vmp_range_count += 2;
cursor++;
}
return cursor;
}
#ifdef __unix__
int vmp_read_vmaps(const char * fname) {
FILE * fd = fopen(fname, "rb");
if (fd == NULL) {
return 0;
}
char * saveptr = NULL;
char * line = NULL;
char * he = NULL;
char * name;
char *start_hex = NULL, *end_hex = NULL;
size_t n = 0;
ssize_t size;
intptr_t start, end;
// assumptions to be verified:
// 1) /proc/self/maps is ordered ascending by start address
// 2) libraries that contain the name 'python' are considered
// candidates in the mapping to be ignored
// 3) libraries containing site-packages are not considered
// candidates
int max_count = _reset_vmp_ranges();
intptr_t * cursor = vmp_ranges;
cursor[0] = -1;
while ((size = getline(&line, &n, fd)) >= 0) {
assert(line != NULL);
start_hex = strtok_r(line, "-", &saveptr);
if (start_hex == NULL) { continue; }
start = strtoll(start_hex, &he, 16);
end_hex = strtok_r(NULL, " ", &saveptr);
if (end_hex == NULL) { continue; }
end = strtoll(end_hex, &he, 16);
// skip over flags, ...
strtok_r(NULL, " ", &saveptr);
strtok_r(NULL, " ", &saveptr);
strtok_r(NULL, " ", &saveptr);
strtok_r(NULL, " ", &saveptr);
name = saveptr;
if (_ignore_symbols_from_path(name)) {
max_count = _resize_ranges(&cursor, max_count);
cursor = _add_to_range(cursor, start, end);
}
free(line);
line = NULL;
n = 0;
}
fclose(fd);
return 1;
}
#endif
#ifdef __APPLE__
int vmp_read_vmaps(const char * fname) {
kern_return_t kr;
task_t task;
mach_vm_address_t addr;
mach_vm_size_t vmsize;
vm_region_top_info_data_t topinfo;
mach_msg_type_number_t count;
memory_object_name_t obj;
int ret = 0;
pid_t pid;
pid = getpid();
kr = task_for_pid(mach_task_self(), pid, &task);
if (kr != KERN_SUCCESS) {
goto teardown;
}
addr = 0;
int max_count = _reset_vmp_ranges();
intptr_t * cursor = vmp_ranges;
cursor[0] = -1;
do {
// extract the top info using vm_region
count = VM_REGION_TOP_INFO_COUNT;
vmsize = 0;
kr = mach_vm_region(task, &addr, &vmsize, VM_REGION_TOP_INFO,
(vm_region_info_t)&topinfo, &count, &obj);
if (kr == KERN_SUCCESS) {
vm_address_t start = (vm_address_t)addr, end = (vm_address_t)(addr + vmsize);
// dladdr now gives the path of the shared object
Dl_info info;
if (dladdr((const void*)start, &info) == 0) {
// could not find image containing start
addr += vmsize;
continue;
}
if (_ignore_symbols_from_path(info.dli_fname)) {
// realloc if the chunk is to small
max_count = _resize_ranges(&cursor, max_count);
cursor = _add_to_range(cursor, start, end);
}
addr = addr + vmsize;
} else if (kr != KERN_INVALID_ADDRESS) {
goto teardown;
}
} while (kr == KERN_SUCCESS);
ret = 1;
teardown:
if (task != MACH_PORT_NULL) {
mach_port_deallocate(mach_task_self(), task);
}
return ret;
}
#endif
static const char * vmprof_error = NULL;
static void * libhandle = NULL;
#ifdef VMPROF_LINUX
#include <link.h>
#define LIBUNWIND "libunwind.so"
#ifdef __i386__
#define PREFIX "x86"
#define LIBUNWIND_SUFFIX ""
#elif __x86_64__
#define PREFIX "x86_64"
#define LIBUNWIND_SUFFIX "-x86_64"
#endif
#define U_PREFIX "_U"
#define UL_PREFIX "_UL"
#endif
int vmp_native_enable(void) {
#ifdef VMPROF_LINUX
void * oldhandle = NULL;
struct link_map * map = NULL;
if (libhandle == NULL) {
// on linux, the wheel includes the libunwind shared object.
libhandle = dlopen(NULL, RTLD_NOW);
if (libhandle != NULL) {
// load the link map, it will contain an entry to
// .libs_vmprof/libunwind-...so, this is the file that is
// distributed with the wheel.
if (dlinfo(libhandle, RTLD_DI_LINKMAP, &map) != 0) {
(void)dlclose(libhandle);
libhandle = NULL;
goto bail_out;
}
// grab the new handle
do {
if (strstr(map->l_name, ".libs_vmprof/libunwind" LIBUNWIND_SUFFIX) != NULL) {
oldhandle = libhandle;
libhandle = dlopen(map->l_name, RTLD_LAZY|RTLD_LOCAL);
(void)dlclose(oldhandle);
oldhandle = NULL;
goto loaded_libunwind;
}
map = map->l_next;
} while (map != NULL);
// did not find .libs_vmprof/libunwind...
(void)dlclose(libhandle);
libhandle = NULL;
}
// fallback! try to load the system's libunwind.so
if ((libhandle = dlopen(LIBUNWIND, RTLD_LAZY | RTLD_LOCAL)) == NULL) {
goto bail_out;
}
loaded_libunwind:
if ((unw_get_reg = dlsym(libhandle, UL_PREFIX PREFIX "_get_reg")) == NULL) {
goto bail_out;
}
if ((unw_get_proc_info = dlsym(libhandle, UL_PREFIX PREFIX "_get_proc_info")) == NULL){
goto bail_out;
}
if ((unw_get_proc_name = dlsym(libhandle, UL_PREFIX PREFIX "_get_proc_name")) == NULL){
goto bail_out;
}
if ((unw_init_local = dlsym(libhandle, UL_PREFIX PREFIX "_init_local")) == NULL) {
goto bail_out;
}
if ((unw_step = dlsym(libhandle, UL_PREFIX PREFIX "_step")) == NULL) {
goto bail_out;
}
if ((unw_is_signal_frame = dlsym(libhandle, UL_PREFIX PREFIX "_is_signal_frame")) == NULL) {
goto bail_out;
}
if ((unw_getcontext = dlsym(libhandle, U_PREFIX PREFIX "_getcontext")) == NULL) {
goto bail_out;
}
}
#endif
vmp_native_traces_enabled = 1;
return 1;
#ifdef VMPROF_LINUX
bail_out:
vmprof_error = dlerror();
fprintf(stderr, "could not load libunwind at runtime. error: %s\n", vmprof_error);
vmp_native_traces_enabled = 0;
return 0;
#endif
}
void vmp_native_disable(void) {
if (libhandle != NULL) {
if (dlclose(libhandle)) {
vmprof_error = dlerror();
fprintf(stderr, "could not close libunwind at runtime. error: %s\n", vmprof_error);
}
libhandle = NULL;
}
vmp_native_traces_enabled = 0;
if (vmp_ranges != NULL) {
free(vmp_ranges);
vmp_ranges = NULL;
}
vmp_range_count = 0;
}
int vmp_ignore_ip(intptr_t ip) {
if (vmp_range_count == 0) {
return 0;
}
int i = vmp_binary_search_ranges(ip, vmp_ranges, (int)vmp_range_count);
if (i == -1) {
return 0;
}
assert((i & 1) == 0 && "returned index MUST be even");
intptr_t v = vmp_ranges[i];
intptr_t v2 = vmp_ranges[i+1];
return v <= ip && ip <= v2;
}
int vmp_binary_search_ranges(intptr_t ip, intptr_t * l, int count) {
intptr_t * r = l + count;
intptr_t * ol = l;
intptr_t * or = r-1;
while (1) {
ptrdiff_t i = (r-l)/2;
if (i == 0) {
if (l == ol && *l > ip) {
// at the start
return -1;
} else if (l == or && *l < ip) {
// at the end
return -1;
} else {
// we found the lower bound
i = l - ol;
if ((i & 1) == 1) {
return (int)i-1;
}
return (int)i;
}
}
intptr_t * m = l + i;
if (ip < *m) {
r = m;
} else {
l = m;
}
}
return -1;
}
int vmp_ignore_symbol_count(void) {
return (int)vmp_range_count;
}
intptr_t * vmp_ignore_symbols(void) {
return vmp_ranges;
}
void vmp_set_ignore_symbols(intptr_t * symbols, int count) {
vmp_ranges = symbols;
vmp_range_count = count;
}
#endif