Skip to content

Commit a8f232a

Browse files
committed
Update libco
1 parent cffc626 commit a8f232a

19 files changed

+826
-145
lines changed

Makefile.common

+3-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@ GB_DIR := $(CORE_DIR)/gb
55
SNES_DIR := $(CORE_DIR)/sfc
66

77
INCFLAGS := \
8-
-I$(CORE_DIR) \
9-
-I$(SNES_DIR)
8+
-I$(CORE_DIR) \
9+
-I$(CORE_DIR)/libco \
10+
-I$(SNES_DIR)
1011

1112
SOURCES_C += $(CORE_DIR)/libco/libco.c
1213

libco/aarch64.c

-1
Original file line numberDiff line numberDiff line change
@@ -126,4 +126,3 @@ void co_switch(cothread_t handle)
126126
#ifdef __cplusplus
127127
}
128128
#endif
129-

libco/amd64.c

+100-74
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*/
66

77
#define LIBCO_C
8-
#include "libco.h"
8+
#include <libco.h>
99
#include <assert.h>
1010
#include <stdlib.h>
1111

@@ -21,69 +21,72 @@ static thread_local long long co_active_buffer[64];
2121
static thread_local cothread_t co_active_handle = 0;
2222
#ifndef CO_USE_INLINE_ASM
2323
static void (*co_swap)(cothread_t, cothread_t) = 0;
24-
#else
25-
void co_swap(cothread_t, cothread_t);
2624
#endif
2725

2826
#ifdef _WIN32
29-
//ABI: Win64
27+
/* ABI: Win64 */
28+
/* On windows handle is allocated by malloc and there it's guaranteed to
29+
have at least 16-byte alignment. Hence we don't need to align
30+
it in order to use movaps. */
3031
static unsigned char co_swap_function[] = {
31-
0x48, 0x89, 0x22, /* mov [rdx],rsp */
32-
0x48, 0x8b, 0x21, /* mov rsp,[rcx] */
33-
0x58, /* pop rax */
34-
0x48, 0x89, 0x6a, 0x08, /* mov [rdx+0x8],rbp */
35-
0x48, 0x89, 0x72, 0x10, /* mov [rdx+0x10],rsi */
36-
0x48, 0x89, 0x7a, 0x18, /* mov [rdx+0x18],rdi */
37-
0x48, 0x89, 0x5a, 0x20, /* mov [rdx+0x20],rbx */
38-
0x4c, 0x89, 0x62, 0x28, /* mov [rdx+0x28],r12 */
39-
0x4c, 0x89, 0x6a, 0x30, /* mov [rdx+0x30],r13 */
40-
0x4c, 0x89, 0x72, 0x38, /* mov [rdx+0x38],r14 */
41-
0x4c, 0x89, 0x7a, 0x40, /* mov [rdx+0x40],r15 */
42-
0x48, 0x81, 0xc2, 0x80, 0x00, 0x00, 0x00, /* add rdx,0x80 */
43-
0x48, 0x83, 0xe2, 0xf0, /* and rdx,-0x10 */
44-
0x0f, 0x29, 0x32, /* movaps [rdx],xmm6 */
45-
0x0f, 0x29, 0x7a, 0x10, /* movaps [rdx+0x10],xmm7 */
46-
0x44, 0x0f, 0x29, 0x42, 0x20, /* movaps [rdx+0x20],xmm8 */
47-
0x44, 0x0f, 0x29, 0x4a, 0x30, /* movaps [rdx+0x30],xmm9 */
48-
0x44, 0x0f, 0x29, 0x52, 0x40, /* movaps [rdx+0x40],xmm10 */
49-
0x44, 0x0f, 0x29, 0x5a, 0x50, /* movaps [rdx+0x50],xmm11 */
50-
0x44, 0x0f, 0x29, 0x62, 0x60, /* movaps [rdx+0x60],xmm12 */
51-
0x44, 0x0f, 0x29, 0x6a, 0x70, /* movaps [rdx+0x70],xmm13 */
52-
0x44, 0x0f, 0x29, 0xb2, 0x80, 0x00, 0x00, 0x00, /* movaps [rdx+0x80],xmm14 */
53-
0x44, 0x0f, 0x29, 0xba, 0x90, 0x00, 0x00, 0x00, /* movaps [rdx+0x90],xmm15 */
54-
0x48, 0x8b, 0x69, 0x08, /* mov rbp,[rcx+0x8] */
55-
0x48, 0x8b, 0x71, 0x10, /* mov rsi,[rcx+0x10] */
56-
0x48, 0x8b, 0x79, 0x18, /* mov rdi,[rcx+0x18] */
57-
0x48, 0x8b, 0x59, 0x20, /* mov rbx,[rcx+0x20] */
58-
0x4c, 0x8b, 0x61, 0x28, /* mov r12,[rcx+0x28] */
59-
0x4c, 0x8b, 0x69, 0x30, /* mov r13,[rcx+0x30] */
60-
0x4c, 0x8b, 0x71, 0x38, /* mov r14,[rcx+0x38] */
61-
0x4c, 0x8b, 0x79, 0x40, /* mov r15,[rcx+0x40] */
62-
0x48, 0x81, 0xc1, 0x80, 0x00, 0x00, 0x00, /* add rcx,0x80 */
63-
0x48, 0x83, 0xe1, 0xf0, /* and rcx,-0x10 */
64-
0x0f, 0x29, 0x31, /* movaps [rcx],xmm6 */
65-
0x0f, 0x29, 0x79, 0x10, /* movaps [rcx+0x10],xmm7 */
66-
0x44, 0x0f, 0x29, 0x41, 0x20, /* movaps [rcx+0x20],xmm8 */
67-
0x44, 0x0f, 0x29, 0x49, 0x30, /* movaps [rcx+0x30],xmm9 */
68-
0x44, 0x0f, 0x29, 0x51, 0x40, /* movaps [rcx+0x40],xmm10 */
69-
0x44, 0x0f, 0x29, 0x59, 0x50, /* movaps [rcx+0x50],xmm11 */
70-
0x44, 0x0f, 0x29, 0x61, 0x60, /* movaps [rcx+0x60],xmm12 */
71-
0x44, 0x0f, 0x29, 0x69, 0x70, /* movaps [rcx+0x70],xmm13 */
72-
0x44, 0x0f, 0x29, 0xb1, 0x80, 0x00, 0x00, 0x00, /* movaps [rcx+0x80],xmm14 */
73-
0x44, 0x0f, 0x29, 0xb9, 0x90, 0x00, 0x00, 0x00, /* movaps [rcx+0x90],xmm15 */
74-
0xff, 0xe0, /* jmp rax */
32+
0x48, 0x89, 0x22, /* mov [rdx],rsp */
33+
0x48, 0x8b, 0x21, /* mov rsp,[rcx] */
34+
0x58, /* pop rax */
35+
0x48, 0x89, 0x6a, 0x08, /* mov [rdx+ 8],rbp */
36+
0x48, 0x89, 0x72, 0x10, /* mov [rdx+16],rsi */
37+
0x48, 0x89, 0x7a, 0x18, /* mov [rdx+24],rdi */
38+
0x48, 0x89, 0x5a, 0x20, /* mov [rdx+32],rbx */
39+
0x4c, 0x89, 0x62, 0x28, /* mov [rdx+40],r12 */
40+
0x4c, 0x89, 0x6a, 0x30, /* mov [rdx+48],r13 */
41+
0x4c, 0x89, 0x72, 0x38, /* mov [rdx+56],r14 */
42+
0x4c, 0x89, 0x7a, 0x40, /* mov [rdx+64],r15 */
43+
#if !defined(LIBCO_NO_SSE)
44+
0x0f, 0x29, 0x72, 0x50, /* movaps [rdx+ 80],xmm6 */
45+
0x0f, 0x29, 0x7a, 0x60, /* movaps [rdx+ 96],xmm7 */
46+
0x44, 0x0f, 0x29, 0x42, 0x70, /* movaps [rdx+112],xmm8 */
47+
0x48, 0x83, 0xc2, 0x70, /* add rdx,112 */
48+
0x44, 0x0f, 0x29, 0x4a, 0x10, /* movaps [rdx+ 16],xmm9 */
49+
0x44, 0x0f, 0x29, 0x52, 0x20, /* movaps [rdx+ 32],xmm10 */
50+
0x44, 0x0f, 0x29, 0x5a, 0x30, /* movaps [rdx+ 48],xmm11 */
51+
0x44, 0x0f, 0x29, 0x62, 0x40, /* movaps [rdx+ 64],xmm12 */
52+
0x44, 0x0f, 0x29, 0x6a, 0x50, /* movaps [rdx+ 80],xmm13 */
53+
0x44, 0x0f, 0x29, 0x72, 0x60, /* movaps [rdx+ 96],xmm14 */
54+
0x44, 0x0f, 0x29, 0x7a, 0x70, /* movaps [rdx+112],xmm15 */
55+
#endif
56+
0x48, 0x8b, 0x69, 0x08, /* mov rbp,[rcx+ 8] */
57+
0x48, 0x8b, 0x71, 0x10, /* mov rsi,[rcx+16] */
58+
0x48, 0x8b, 0x79, 0x18, /* mov rdi,[rcx+24] */
59+
0x48, 0x8b, 0x59, 0x20, /* mov rbx,[rcx+32] */
60+
0x4c, 0x8b, 0x61, 0x28, /* mov r12,[rcx+40] */
61+
0x4c, 0x8b, 0x69, 0x30, /* mov r13,[rcx+48] */
62+
0x4c, 0x8b, 0x71, 0x38, /* mov r14,[rcx+56] */
63+
0x4c, 0x8b, 0x79, 0x40, /* mov r15,[rcx+64] */
64+
#if !defined(LIBCO_NO_SSE)
65+
0x0f, 0x28, 0x71, 0x50, /* movaps xmm6, [rcx+ 80] */
66+
0x0f, 0x28, 0x79, 0x60, /* movaps xmm7, [rcx+ 96] */
67+
0x44, 0x0f, 0x28, 0x41, 0x70, /* movaps xmm8, [rcx+112] */
68+
0x48, 0x83, 0xc1, 0x70, /* add rcx,112 */
69+
0x44, 0x0f, 0x28, 0x49, 0x10, /* movaps xmm9, [rcx+ 16] */
70+
0x44, 0x0f, 0x28, 0x51, 0x20, /* movaps xmm10,[rcx+ 32] */
71+
0x44, 0x0f, 0x28, 0x59, 0x30, /* movaps xmm11,[rcx+ 48] */
72+
0x44, 0x0f, 0x28, 0x61, 0x40, /* movaps xmm12,[rcx+ 64] */
73+
0x44, 0x0f, 0x28, 0x69, 0x50, /* movaps xmm13,[rcx+ 80] */
74+
0x44, 0x0f, 0x28, 0x71, 0x60, /* movaps xmm14,[rcx+ 96] */
75+
0x44, 0x0f, 0x28, 0x79, 0x70, /* movaps xmm15,[rcx+112] */
76+
#endif
77+
0xff, 0xe0, /* jmp rax */
7578
};
7679

7780
#include <windows.h>
7881

79-
void co_init(void)
82+
static void co_init(void)
8083
{
8184
DWORD old_privileges;
8285
VirtualProtect(co_swap_function,
8386
sizeof(co_swap_function), PAGE_EXECUTE_READWRITE, &old_privileges);
8487
}
8588
#else
86-
//ABI: SystemV
89+
/* ABI: SystemV */
8790
#ifndef CO_USE_INLINE_ASM
8891
static unsigned char co_swap_function[] = {
8992
0x48, 0x89, 0x26, /* mov [rsi],rsp */
@@ -107,37 +110,15 @@ static unsigned char co_swap_function[] = {
107110
#include <unistd.h>
108111
#include <sys/mman.h>
109112

110-
void co_init(void)
113+
static void co_init(void)
111114
{
112115
unsigned long long addr = (unsigned long long)co_swap_function;
113116
unsigned long long base = addr - (addr % sysconf(_SC_PAGESIZE));
114117
unsigned long long size = (addr - base) + sizeof(co_swap_function);
115118
mprotect((void*)base, size, PROT_READ | PROT_WRITE | PROT_EXEC);
116119
}
117120
#else
118-
__asm__(
119-
".intel_syntax noprefix\n"
120-
".globl co_swap \n"
121-
"co_swap: \n"
122-
".globl _co_swap \n" /* OSX ABI is different from Linux. */
123-
"_co_swap: \n"
124-
"mov [rsi],rsp \n"
125-
"mov [rsi+0x08],rbp \n"
126-
"mov [rsi+0x10],rbx \n"
127-
"mov [rsi+0x18],r12 \n"
128-
"mov [rsi+0x20],r13 \n"
129-
"mov [rsi+0x28],r14 \n"
130-
"mov [rsi+0x30],r15 \n"
131-
"mov rsp,[rdi] \n"
132-
"mov rbp,[rdi+0x08] \n"
133-
"mov rbx,[rdi+0x10] \n"
134-
"mov r12,[rdi+0x18] \n"
135-
"mov r13,[rdi+0x20] \n"
136-
"mov r14,[rdi+0x28] \n"
137-
"mov r15,[rdi+0x30] \n"
138-
"ret \n"
139-
".att_syntax \n"
140-
);
121+
static void co_init(void) {}
141122
#endif
142123
#endif
143124

@@ -170,27 +151,72 @@ cothread_t co_create(unsigned int size, void (*entrypoint)(void))
170151
size += 512; /* allocate additional space for storage */
171152
size &= ~15; /* align stack to 16-byte boundary */
172153

154+
#ifdef __GENODE__
155+
if((handle = (cothread_t)genode_alloc_secondary_stack(size)))
156+
{
157+
long long *p = (long long*)((char*)handle); /* OS returns top of stack */
158+
*--p = (long long)crash; /* crash if entrypoint returns */
159+
*--p = (long long)entrypoint; /* start of function */
160+
*(long long*)handle = (long long)p; /* stack pointer */
161+
}
162+
#else
173163
if((handle = (cothread_t)malloc(size)))
174164
{
175165
long long *p = (long long*)((char*)handle + size); /* seek to top of stack */
176166
*--p = (long long)crash; /* crash if entrypoint returns */
177167
*--p = (long long)entrypoint; /* start of function */
178168
*(long long*)handle = (long long)p; /* stack pointer */
179169
}
170+
#endif
180171

181172
return handle;
182173
}
183174

184175
void co_delete(cothread_t handle)
185176
{
177+
#ifdef __GENODE__
178+
genode_free_secondary_stack(handle);
179+
#else
186180
free(handle);
181+
#endif
187182
}
188183

184+
#ifndef CO_USE_INLINE_ASM
189185
void co_switch(cothread_t handle)
190186
{
191187
register cothread_t co_previous_handle = co_active_handle;
192188
co_swap(co_active_handle = handle, co_previous_handle);
193189
}
190+
#else
191+
#ifdef __APPLE__
192+
#define ASM_PREFIX "_"
193+
#else
194+
#define ASM_PREFIX ""
195+
#endif
196+
__asm__(
197+
".intel_syntax noprefix \n"
198+
".globl " ASM_PREFIX "co_switch \n"
199+
ASM_PREFIX "co_switch: \n"
200+
"mov rsi, [rip+" ASM_PREFIX "co_active_handle]\n"
201+
"mov [rsi],rsp \n"
202+
"mov [rsi+0x08],rbp \n"
203+
"mov [rsi+0x10],rbx \n"
204+
"mov [rsi+0x18],r12 \n"
205+
"mov [rsi+0x20],r13 \n"
206+
"mov [rsi+0x28],r14 \n"
207+
"mov [rsi+0x30],r15 \n"
208+
"mov [rip+" ASM_PREFIX "co_active_handle], rdi\n"
209+
"mov rsp,[rdi] \n"
210+
"mov rbp,[rdi+0x08] \n"
211+
"mov rbx,[rdi+0x10] \n"
212+
"mov r12,[rdi+0x18] \n"
213+
"mov r13,[rdi+0x20] \n"
214+
"mov r14,[rdi+0x28] \n"
215+
"mov r15,[rdi+0x30] \n"
216+
"ret \n"
217+
".att_syntax \n"
218+
);
219+
#endif
194220

195221
#ifdef __cplusplus
196222
}

libco/armeabi.c

+20-10
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*/
66

77
#define LIBCO_C
8-
#include "libco.h"
8+
#include <libco.h>
99
#include <assert.h>
1010
#include <stdlib.h>
1111
#include <string.h>
@@ -22,26 +22,37 @@ extern "C" {
2222
static thread_local uint32_t co_active_buffer[64];
2323
static thread_local cothread_t co_active_handle;
2424

25-
asm (
25+
__asm__ (
26+
#if defined(__thumb2__)
27+
".thumb\n"
28+
".align 2\n"
29+
".globl co_switch_arm\n"
30+
".globl _co_switch_arm\n"
31+
"co_switch_arm:\n"
32+
"_co_switch_arm:\n"
33+
" mov r3, sp\n"
34+
" stmia r1!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
35+
" stmia r1!, {r3, lr}\n"
36+
" ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
37+
" ldmfd r0!, { r3 }\n"
38+
" mov sp, r3\n"
39+
" ldmfd r0!, { r3 }\n"
40+
" mov pc, r3\n"
41+
#else
2642
".arm\n"
2743
".align 4\n"
2844
".globl co_switch_arm\n"
2945
".globl _co_switch_arm\n"
3046
"co_switch_arm:\n"
31-
"_co_switch_arm:\n"
47+
"_co_switch_arm:\n"
3248
" stmia r1!, {r4, r5, r6, r7, r8, r9, r10, r11, sp, lr}\n"
3349
" ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11, sp, pc}\n"
50+
#endif
3451
);
3552

3653
/* ASM */
3754
void co_switch_arm(cothread_t handle, cothread_t current);
3855

39-
static void crash(void)
40-
{
41-
/* Called only if cothread_t entrypoint returns. */
42-
assert(0);
43-
}
44-
4556
cothread_t co_create(unsigned int size, void (*entrypoint)(void))
4657
{
4758
size = (size + 1023) & ~1023;
@@ -93,4 +104,3 @@ void co_switch(cothread_t handle)
93104
#ifdef __cplusplus
94105
}
95106
#endif
96-

libco/fiber.c

+14-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*/
66

77
#define LIBCO_C
8-
#include "libco.h"
8+
#include <libco.h>
99
#define WINVER 0x0400
1010
#define _WIN32_WINNT 0x0400
1111
#define WIN32_LEAN_AND_MEAN
@@ -26,7 +26,11 @@ cothread_t co_active(void)
2626
{
2727
if(!co_active_)
2828
{
29+
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
30+
ConvertThreadToFiberEx(0, FIBER_FLAG_FLOAT_SWITCH);
31+
#else
2932
ConvertThreadToFiber(0);
33+
#endif
3034
co_active_ = GetCurrentFiber();
3135
}
3236
return co_active_;
@@ -36,10 +40,19 @@ cothread_t co_create(unsigned int heapsize, void (*coentry)(void))
3640
{
3741
if(!co_active_)
3842
{
43+
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
44+
ConvertThreadToFiberEx(0, FIBER_FLAG_FLOAT_SWITCH);
45+
#else
3946
ConvertThreadToFiber(0);
47+
#endif
4048
co_active_ = GetCurrentFiber();
4149
}
50+
51+
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
52+
return (cothread_t)CreateFiberEx(heapsize, heapsize, FIBER_FLAG_FLOAT_SWITCH, co_thunk, (void*)coentry);
53+
#else
4254
return (cothread_t)CreateFiber(heapsize, co_thunk, (void*)coentry);
55+
#endif
4356
}
4457

4558
void co_delete(cothread_t cothread)

libco/genode.cpp

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
libco.genode_secondary_stack (2018-09-15)
3+
author: Emery Hemingway
4+
license: public domain
5+
*/
6+
7+
/* Genode include */
8+
#include <base/thread.h>
9+
10+
/* Libco include */
11+
#include <libco.h>
12+
13+
extern "C"
14+
void *genode_alloc_secondary_stack(unsigned long stack_size)
15+
{
16+
try {
17+
return Genode::Thread::myself()->alloc_secondary_stack("libco", stack_size); }
18+
catch (...) {
19+
Genode::error("libco: failed to allocate ", stack_size, " byte secondary stack");
20+
return nullptr;
21+
}
22+
23+
}
24+
25+
extern "C"
26+
void genode_free_secondary_stack(void *stack)
27+
{
28+
Genode::Thread::myself()->free_secondary_stack(stack);
29+
}

0 commit comments

Comments
 (0)