@@ -18,23 +18,30 @@ typedef PyObject *(*py_trampoline)(py_evaluator, PyThreadState *,
1818extern void * _Py_trampoline_func_start ;
1919extern void * _Py_trampoline_func_end ;
2020
21- typedef struct {
21+ struct code_arena_st {
2222 char * start_addr ;
2323 char * current_addr ;
2424 size_t size ;
2525 size_t size_left ;
2626 size_t code_size ;
27- } code_arena_t ;
27+ struct code_arena_st * prev ;
28+ };
29+
30+ typedef struct code_arena_st code_arena_t ;
2831
2932static Py_ssize_t extra_code_index = -1 ;
30- static code_arena_t code_arena ;
33+ static code_arena_t * code_arena ;
34+ static FILE * perf_map_file ;
3135
3236static int
33- new_code_arena ()
37+ new_code_arena (void )
3438{
35- size_t page_size = sysconf (_SC_PAGESIZE );
39+ // non-trivial programs typically need 64 to 256 kiB.
40+ size_t mem_size = 4096 * 16 ;
41+ assert (mem_size % sysconf (_SC_PAGESIZE ) == 0 );
3642 char * memory = mmap (NULL , // address
37- page_size , PROT_READ | PROT_WRITE | PROT_EXEC ,
43+ mem_size ,
44+ PROT_READ | PROT_WRITE | PROT_EXEC ,
3845 MAP_PRIVATE | MAP_ANONYMOUS ,
3946 -1 , // fd (not used here)
4047 0 ); // offset (not used here)
@@ -46,21 +53,43 @@ new_code_arena()
4653 void * end = & _Py_trampoline_func_end ;
4754 size_t code_size = end - start ;
4855
49- long n_copies = page_size / code_size ;
56+ long n_copies = mem_size / code_size ;
5057 for (int i = 0 ; i < n_copies ; i ++ ) {
5158 memcpy (memory + i * code_size , start , code_size * sizeof (char ));
5259 }
5360
54- mprotect (memory , page_size , PROT_READ | PROT_EXEC );
61+ mprotect (memory , mem_size , PROT_READ | PROT_EXEC );
62+
63+ code_arena_t * new_arena = PyMem_RawCalloc (1 , sizeof (code_arena_t ));
64+ if (new_arena == NULL ) {
65+ Py_FatalError ("Failed to allocate new code arena struct" );
66+ return -1 ;
67+ }
5568
56- code_arena .start_addr = memory ;
57- code_arena .current_addr = memory ;
58- code_arena .size = page_size ;
59- code_arena .size_left = page_size ;
60- code_arena .code_size = code_size ;
69+ new_arena -> start_addr = memory ;
70+ new_arena -> current_addr = memory ;
71+ new_arena -> size = mem_size ;
72+ new_arena -> size_left = mem_size ;
73+ new_arena -> code_size = code_size ;
74+ new_arena -> prev = code_arena ;
75+ code_arena = new_arena ;
6176 return 0 ;
6277}
6378
79+ static void
80+ free_code_arenas (void )
81+ {
82+ code_arena_t * cur = code_arena ;
83+ code_arena_t * prev ;
84+ code_arena = NULL ; // invalid static pointer
85+ while (cur ) {
86+ munmap (cur -> start_addr , cur -> size );
87+ prev = cur -> prev ;
88+ PyMem_RawFree (cur );
89+ cur = prev ;
90+ }
91+ }
92+
6493static inline py_trampoline
6594code_arena_new_code (code_arena_t * code_arena )
6695{
@@ -73,27 +102,32 @@ code_arena_new_code(code_arena_t *code_arena)
73102static inline py_trampoline
74103compile_trampoline (void )
75104{
76- if (code_arena . size_left <= code_arena . code_size ) {
105+ if (( code_arena == NULL ) || ( code_arena -> size_left <= code_arena -> code_size ) ) {
77106 if (new_code_arena () < 0 ) {
78107 return NULL ;
79108 }
80109 }
81110
82- assert (code_arena . size_left <= code_arena . size );
83- return code_arena_new_code (& code_arena );
111+ assert (code_arena -> size_left <= code_arena -> size );
112+ return code_arena_new_code (code_arena );
84113}
85114
86115static inline FILE *
87- perf_map_open ( pid_t pid )
116+ perf_map_get_file ( void )
88117{
118+ if (perf_map_file ) {
119+ return perf_map_file ;
120+ }
89121 char filename [100 ];
122+ pid_t pid = getpid ();
123+ // TODO: %d is incorrect if pid_t is long long
90124 snprintf (filename , sizeof (filename ), "/tmp/perf-%d.map" , pid );
91- FILE * res = fopen (filename , "a" );
92- if (!res ) {
125+ perf_map_file = fopen (filename , "a" );
126+ if (!perf_map_file ) {
93127 _Py_FatalErrorFormat (__func__ , "Couldn't open %s: errno(%d)" , filename , errno );
94128 return NULL ;
95129 }
96- return res ;
130+ return perf_map_file ;
97131}
98132
99133static inline int
@@ -112,6 +146,7 @@ perf_map_write_entry(FILE *method_file, const void *code_addr,
112146{
113147 fprintf (method_file , "%lx %x py::%s:%s\n" , (unsigned long )code_addr ,
114148 code_size , entry , file );
149+ fflush (method_file );
115150}
116151
117152static PyObject *
@@ -129,14 +164,13 @@ py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame,
129164 if (new_trampoline == NULL ) {
130165 return NULL ;
131166 }
132- FILE * pfile = perf_map_open ( getpid () );
167+ FILE * pfile = perf_map_get_file ( );
133168 if (pfile == NULL ) {
134169 return NULL ;
135170 }
136- perf_map_write_entry (pfile , new_trampoline , code_arena . code_size ,
171+ perf_map_write_entry (pfile , new_trampoline , code_arena -> code_size ,
137172 PyUnicode_AsUTF8 (co -> co_qualname ),
138173 PyUnicode_AsUTF8 (co -> co_filename ));
139- perf_map_close (pfile );
140174 _PyCode_SetExtra ((PyObject * )co , extra_code_index ,
141175 (void * )new_trampoline );
142176 f = new_trampoline ;
@@ -163,3 +197,23 @@ _PyPerfTrampoline_Init(int activate)
163197 }
164198 return 0 ;
165199}
200+
201+ int
202+ _PyPerfTrampoline_Fini (void )
203+ {
204+ #ifdef HAVE_PERF_TRAMPOLINE
205+ free_code_arenas ();
206+ perf_map_close (perf_map_file );
207+ #endif
208+ return 0 ;
209+ }
210+
211+ PyStatus
212+ _PyPerfTrampoline_AfterFork_Child (void )
213+ {
214+ #ifdef HAVE_PERF_TRAMPOLINE
215+ // close file in child.
216+ perf_map_close (perf_map_file );
217+ #endif
218+ return PyStatus_Ok ();
219+ }
0 commit comments