5
5
*/
6
6
7
7
#define LIBCO_C
8
- #include " libco.h"
8
+ #include < libco.h>
9
9
#include <assert.h>
10
10
#include <stdlib.h>
11
11
@@ -21,69 +21,72 @@ static thread_local long long co_active_buffer[64];
21
21
static thread_local cothread_t co_active_handle = 0 ;
22
22
#ifndef CO_USE_INLINE_ASM
23
23
static void (* co_swap )(cothread_t , cothread_t ) = 0 ;
24
- #else
25
- void co_swap (cothread_t , cothread_t );
26
24
#endif
27
25
28
26
#ifdef _WIN32
29
- //ABI: Win64
27
+ /* ABI: Win64 */
28
+ /* On windows handle is allocated by malloc and there it's guaranteed to
29
+ have at least 16-byte alignment. Hence we don't need to align
30
+ it in order to use movaps. */
30
31
static unsigned char co_swap_function [] = {
31
- 0x48 , 0x89 , 0x22 , /* mov [rdx],rsp */
32
- 0x48 , 0x8b , 0x21 , /* mov rsp,[rcx] */
33
- 0x58 , /* pop rax */
34
- 0x48 , 0x89 , 0x6a , 0x08 , /* mov [rdx+0x8],rbp */
35
- 0x48 , 0x89 , 0x72 , 0x10 , /* mov [rdx+0x10],rsi */
36
- 0x48 , 0x89 , 0x7a , 0x18 , /* mov [rdx+0x18],rdi */
37
- 0x48 , 0x89 , 0x5a , 0x20 , /* mov [rdx+0x20],rbx */
38
- 0x4c , 0x89 , 0x62 , 0x28 , /* mov [rdx+0x28],r12 */
39
- 0x4c , 0x89 , 0x6a , 0x30 , /* mov [rdx+0x30],r13 */
40
- 0x4c , 0x89 , 0x72 , 0x38 , /* mov [rdx+0x38],r14 */
41
- 0x4c , 0x89 , 0x7a , 0x40 , /* mov [rdx+0x40],r15 */
42
- 0x48 , 0x81 , 0xc2 , 0x80 , 0x00 , 0x00 , 0x00 , /* add rdx,0x80 */
43
- 0x48 , 0x83 , 0xe2 , 0xf0 , /* and rdx,-0x10 */
44
- 0x0f , 0x29 , 0x32 , /* movaps [rdx],xmm6 */
45
- 0x0f , 0x29 , 0x7a , 0x10 , /* movaps [rdx+0x10],xmm7 */
46
- 0x44 , 0x0f , 0x29 , 0x42 , 0x20 , /* movaps [rdx+0x20],xmm8 */
47
- 0x44 , 0x0f , 0x29 , 0x4a , 0x30 , /* movaps [rdx+0x30],xmm9 */
48
- 0x44 , 0x0f , 0x29 , 0x52 , 0x40 , /* movaps [rdx+0x40],xmm10 */
49
- 0x44 , 0x0f , 0x29 , 0x5a , 0x50 , /* movaps [rdx+0x50],xmm11 */
50
- 0x44 , 0x0f , 0x29 , 0x62 , 0x60 , /* movaps [rdx+0x60],xmm12 */
51
- 0x44 , 0x0f , 0x29 , 0x6a , 0x70 , /* movaps [rdx+0x70],xmm13 */
52
- 0x44 , 0x0f , 0x29 , 0xb2 , 0x80 , 0x00 , 0x00 , 0x00 , /* movaps [rdx+0x80],xmm14 */
53
- 0x44 , 0x0f , 0x29 , 0xba , 0x90 , 0x00 , 0x00 , 0x00 , /* movaps [rdx+0x90],xmm15 */
54
- 0x48 , 0x8b , 0x69 , 0x08 , /* mov rbp,[rcx+0x8] */
55
- 0x48 , 0x8b , 0x71 , 0x10 , /* mov rsi,[rcx+0x10] */
56
- 0x48 , 0x8b , 0x79 , 0x18 , /* mov rdi,[rcx+0x18] */
57
- 0x48 , 0x8b , 0x59 , 0x20 , /* mov rbx,[rcx+0x20] */
58
- 0x4c , 0x8b , 0x61 , 0x28 , /* mov r12,[rcx+0x28] */
59
- 0x4c , 0x8b , 0x69 , 0x30 , /* mov r13,[rcx+0x30] */
60
- 0x4c , 0x8b , 0x71 , 0x38 , /* mov r14,[rcx+0x38] */
61
- 0x4c , 0x8b , 0x79 , 0x40 , /* mov r15,[rcx+0x40] */
62
- 0x48 , 0x81 , 0xc1 , 0x80 , 0x00 , 0x00 , 0x00 , /* add rcx,0x80 */
63
- 0x48 , 0x83 , 0xe1 , 0xf0 , /* and rcx,-0x10 */
64
- 0x0f , 0x29 , 0x31 , /* movaps [rcx],xmm6 */
65
- 0x0f , 0x29 , 0x79 , 0x10 , /* movaps [rcx+0x10],xmm7 */
66
- 0x44 , 0x0f , 0x29 , 0x41 , 0x20 , /* movaps [rcx+0x20],xmm8 */
67
- 0x44 , 0x0f , 0x29 , 0x49 , 0x30 , /* movaps [rcx+0x30],xmm9 */
68
- 0x44 , 0x0f , 0x29 , 0x51 , 0x40 , /* movaps [rcx+0x40],xmm10 */
69
- 0x44 , 0x0f , 0x29 , 0x59 , 0x50 , /* movaps [rcx+0x50],xmm11 */
70
- 0x44 , 0x0f , 0x29 , 0x61 , 0x60 , /* movaps [rcx+0x60],xmm12 */
71
- 0x44 , 0x0f , 0x29 , 0x69 , 0x70 , /* movaps [rcx+0x70],xmm13 */
72
- 0x44 , 0x0f , 0x29 , 0xb1 , 0x80 , 0x00 , 0x00 , 0x00 , /* movaps [rcx+0x80],xmm14 */
73
- 0x44 , 0x0f , 0x29 , 0xb9 , 0x90 , 0x00 , 0x00 , 0x00 , /* movaps [rcx+0x90],xmm15 */
74
- 0xff , 0xe0 , /* jmp rax */
32
+ 0x48 , 0x89 , 0x22 , /* mov [rdx],rsp */
33
+ 0x48 , 0x8b , 0x21 , /* mov rsp,[rcx] */
34
+ 0x58 , /* pop rax */
35
+ 0x48 , 0x89 , 0x6a , 0x08 , /* mov [rdx+ 8],rbp */
36
+ 0x48 , 0x89 , 0x72 , 0x10 , /* mov [rdx+16],rsi */
37
+ 0x48 , 0x89 , 0x7a , 0x18 , /* mov [rdx+24],rdi */
38
+ 0x48 , 0x89 , 0x5a , 0x20 , /* mov [rdx+32],rbx */
39
+ 0x4c , 0x89 , 0x62 , 0x28 , /* mov [rdx+40],r12 */
40
+ 0x4c , 0x89 , 0x6a , 0x30 , /* mov [rdx+48],r13 */
41
+ 0x4c , 0x89 , 0x72 , 0x38 , /* mov [rdx+56],r14 */
42
+ 0x4c , 0x89 , 0x7a , 0x40 , /* mov [rdx+64],r15 */
43
+ #if !defined (LIBCO_NO_SSE )
44
+ 0x0f , 0x29 , 0x72 , 0x50 , /* movaps [rdx+ 80],xmm6 */
45
+ 0x0f , 0x29 , 0x7a , 0x60 , /* movaps [rdx+ 96],xmm7 */
46
+ 0x44 , 0x0f , 0x29 , 0x42 , 0x70 , /* movaps [rdx+112],xmm8 */
47
+ 0x48 , 0x83 , 0xc2 , 0x70 , /* add rdx,112 */
48
+ 0x44 , 0x0f , 0x29 , 0x4a , 0x10 , /* movaps [rdx+ 16],xmm9 */
49
+ 0x44 , 0x0f , 0x29 , 0x52 , 0x20 , /* movaps [rdx+ 32],xmm10 */
50
+ 0x44 , 0x0f , 0x29 , 0x5a , 0x30 , /* movaps [rdx+ 48],xmm11 */
51
+ 0x44 , 0x0f , 0x29 , 0x62 , 0x40 , /* movaps [rdx+ 64],xmm12 */
52
+ 0x44 , 0x0f , 0x29 , 0x6a , 0x50 , /* movaps [rdx+ 80],xmm13 */
53
+ 0x44 , 0x0f , 0x29 , 0x72 , 0x60 , /* movaps [rdx+ 96],xmm14 */
54
+ 0x44 , 0x0f , 0x29 , 0x7a , 0x70 , /* movaps [rdx+112],xmm15 */
55
+ #endif
56
+ 0x48 , 0x8b , 0x69 , 0x08 , /* mov rbp,[rcx+ 8] */
57
+ 0x48 , 0x8b , 0x71 , 0x10 , /* mov rsi,[rcx+16] */
58
+ 0x48 , 0x8b , 0x79 , 0x18 , /* mov rdi,[rcx+24] */
59
+ 0x48 , 0x8b , 0x59 , 0x20 , /* mov rbx,[rcx+32] */
60
+ 0x4c , 0x8b , 0x61 , 0x28 , /* mov r12,[rcx+40] */
61
+ 0x4c , 0x8b , 0x69 , 0x30 , /* mov r13,[rcx+48] */
62
+ 0x4c , 0x8b , 0x71 , 0x38 , /* mov r14,[rcx+56] */
63
+ 0x4c , 0x8b , 0x79 , 0x40 , /* mov r15,[rcx+64] */
64
+ #if !defined (LIBCO_NO_SSE )
65
+ 0x0f , 0x28 , 0x71 , 0x50 , /* movaps xmm6, [rcx+ 80] */
66
+ 0x0f , 0x28 , 0x79 , 0x60 , /* movaps xmm7, [rcx+ 96] */
67
+ 0x44 , 0x0f , 0x28 , 0x41 , 0x70 , /* movaps xmm8, [rcx+112] */
68
+ 0x48 , 0x83 , 0xc1 , 0x70 , /* add rcx,112 */
69
+ 0x44 , 0x0f , 0x28 , 0x49 , 0x10 , /* movaps xmm9, [rcx+ 16] */
70
+ 0x44 , 0x0f , 0x28 , 0x51 , 0x20 , /* movaps xmm10,[rcx+ 32] */
71
+ 0x44 , 0x0f , 0x28 , 0x59 , 0x30 , /* movaps xmm11,[rcx+ 48] */
72
+ 0x44 , 0x0f , 0x28 , 0x61 , 0x40 , /* movaps xmm12,[rcx+ 64] */
73
+ 0x44 , 0x0f , 0x28 , 0x69 , 0x50 , /* movaps xmm13,[rcx+ 80] */
74
+ 0x44 , 0x0f , 0x28 , 0x71 , 0x60 , /* movaps xmm14,[rcx+ 96] */
75
+ 0x44 , 0x0f , 0x28 , 0x79 , 0x70 , /* movaps xmm15,[rcx+112] */
76
+ #endif
77
+ 0xff , 0xe0 , /* jmp rax */
75
78
};
76
79
77
80
#include <windows.h>
78
81
79
- void co_init (void )
82
+ static void co_init (void )
80
83
{
81
84
DWORD old_privileges ;
82
85
VirtualProtect (co_swap_function ,
83
86
sizeof (co_swap_function ), PAGE_EXECUTE_READWRITE , & old_privileges );
84
87
}
85
88
#else
86
- // ABI: SystemV
89
+ /* ABI: SystemV */
87
90
#ifndef CO_USE_INLINE_ASM
88
91
static unsigned char co_swap_function [] = {
89
92
0x48 , 0x89 , 0x26 , /* mov [rsi],rsp */
@@ -107,37 +110,15 @@ static unsigned char co_swap_function[] = {
107
110
#include <unistd.h>
108
111
#include <sys/mman.h>
109
112
110
- void co_init (void )
113
+ static void co_init (void )
111
114
{
112
115
unsigned long long addr = (unsigned long long )co_swap_function ;
113
116
unsigned long long base = addr - (addr % sysconf (_SC_PAGESIZE ));
114
117
unsigned long long size = (addr - base ) + sizeof (co_swap_function );
115
118
mprotect ((void * )base , size , PROT_READ | PROT_WRITE | PROT_EXEC );
116
119
}
117
120
#else
118
- __asm__(
119
- ".intel_syntax noprefix\n"
120
- ".globl co_swap \n"
121
- "co_swap: \n"
122
- ".globl _co_swap \n" /* OSX ABI is different from Linux. */
123
- "_co_swap: \n"
124
- "mov [rsi],rsp \n"
125
- "mov [rsi+0x08],rbp \n"
126
- "mov [rsi+0x10],rbx \n"
127
- "mov [rsi+0x18],r12 \n"
128
- "mov [rsi+0x20],r13 \n"
129
- "mov [rsi+0x28],r14 \n"
130
- "mov [rsi+0x30],r15 \n"
131
- "mov rsp,[rdi] \n"
132
- "mov rbp,[rdi+0x08] \n"
133
- "mov rbx,[rdi+0x10] \n"
134
- "mov r12,[rdi+0x18] \n"
135
- "mov r13,[rdi+0x20] \n"
136
- "mov r14,[rdi+0x28] \n"
137
- "mov r15,[rdi+0x30] \n"
138
- "ret \n"
139
- ".att_syntax \n"
140
- );
121
+ static void co_init (void ) {}
141
122
#endif
142
123
#endif
143
124
@@ -170,27 +151,72 @@ cothread_t co_create(unsigned int size, void (*entrypoint)(void))
170
151
size += 512 ; /* allocate additional space for storage */
171
152
size &= ~15 ; /* align stack to 16-byte boundary */
172
153
154
+ #ifdef __GENODE__
155
+ if ((handle = (cothread_t )genode_alloc_secondary_stack (size )))
156
+ {
157
+ long long * p = (long long * )((char * )handle ); /* OS returns top of stack */
158
+ * -- p = (long long )crash ; /* crash if entrypoint returns */
159
+ * -- p = (long long )entrypoint ; /* start of function */
160
+ * (long long * )handle = (long long )p ; /* stack pointer */
161
+ }
162
+ #else
173
163
if ((handle = (cothread_t )malloc (size )))
174
164
{
175
165
long long * p = (long long * )((char * )handle + size ); /* seek to top of stack */
176
166
* -- p = (long long )crash ; /* crash if entrypoint returns */
177
167
* -- p = (long long )entrypoint ; /* start of function */
178
168
* (long long * )handle = (long long )p ; /* stack pointer */
179
169
}
170
+ #endif
180
171
181
172
return handle ;
182
173
}
183
174
184
175
void co_delete (cothread_t handle )
185
176
{
177
+ #ifdef __GENODE__
178
+ genode_free_secondary_stack (handle );
179
+ #else
186
180
free (handle );
181
+ #endif
187
182
}
188
183
184
+ #ifndef CO_USE_INLINE_ASM
189
185
void co_switch (cothread_t handle )
190
186
{
191
187
register cothread_t co_previous_handle = co_active_handle ;
192
188
co_swap (co_active_handle = handle , co_previous_handle );
193
189
}
190
+ #else
191
+ #ifdef __APPLE__
192
+ #define ASM_PREFIX "_"
193
+ #else
194
+ #define ASM_PREFIX ""
195
+ #endif
196
+ __asm__(
197
+ ".intel_syntax noprefix \n"
198
+ ".globl " ASM_PREFIX "co_switch \n"
199
+ ASM_PREFIX "co_switch: \n"
200
+ "mov rsi, [rip+" ASM_PREFIX "co_active_handle]\n"
201
+ "mov [rsi],rsp \n"
202
+ "mov [rsi+0x08],rbp \n"
203
+ "mov [rsi+0x10],rbx \n"
204
+ "mov [rsi+0x18],r12 \n"
205
+ "mov [rsi+0x20],r13 \n"
206
+ "mov [rsi+0x28],r14 \n"
207
+ "mov [rsi+0x30],r15 \n"
208
+ "mov [rip+" ASM_PREFIX "co_active_handle], rdi\n"
209
+ "mov rsp,[rdi] \n"
210
+ "mov rbp,[rdi+0x08] \n"
211
+ "mov rbx,[rdi+0x10] \n"
212
+ "mov r12,[rdi+0x18] \n"
213
+ "mov r13,[rdi+0x20] \n"
214
+ "mov r14,[rdi+0x28] \n"
215
+ "mov r15,[rdi+0x30] \n"
216
+ "ret \n"
217
+ ".att_syntax \n"
218
+ );
219
+ #endif
194
220
195
221
#ifdef __cplusplus
196
222
}
0 commit comments