-
Couldn't load subscription status.
- Fork 728
Refactor Orc JIT to enable lazy compilation #974
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
016da8d
b26022a
43663ed
f061a11
5a08e30
017e0a5
37c9b23
60d4ac7
33d0e0f
a51ea94
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2760,6 +2760,90 @@ aot_load_from_aot_file(const uint8 *buf, uint32 size, char *error_buf, | |
| } | ||
|
|
||
| #if WASM_ENABLE_JIT != 0 | ||
| #if WASM_ENABLE_LAZY_JIT != 0 | ||
| /* Orc JIT thread arguments */ | ||
| typedef struct OrcJitThreadArg { | ||
| AOTCompData *comp_data; | ||
| AOTCompContext *comp_ctx; | ||
| AOTModule *module; | ||
| int32 group_idx; | ||
| int32 group_stride; | ||
| } OrcJitThreadArg; | ||
|
|
||
| static bool orcjit_stop_compiling = false; | ||
| static korp_tid orcjit_threads[WASM_LAZY_JIT_COMPILE_THREAD_NUM]; | ||
| static OrcJitThreadArg orcjit_thread_args[WASM_LAZY_JIT_COMPILE_THREAD_NUM]; | ||
|
|
||
| static void * | ||
| orcjit_thread_callback(void *arg) | ||
| { | ||
| LLVMErrorRef error; | ||
| LLVMOrcJITTargetAddress func_addr = 0; | ||
| OrcJitThreadArg *thread_arg = (OrcJitThreadArg *)arg; | ||
| AOTCompData *comp_data = thread_arg->comp_data; | ||
| AOTCompContext *comp_ctx = thread_arg->comp_ctx; | ||
| AOTModule *module = thread_arg->module; | ||
| char func_name[32]; | ||
| int32 i; | ||
|
|
||
| /* Compile wasm functions of this group */ | ||
| for (i = thread_arg->group_idx; i < (int32)comp_data->func_count; | ||
| i += thread_arg->group_stride) { | ||
| if (!module->func_ptrs[i]) { | ||
| snprintf(func_name, sizeof(func_name), "%s%d", AOT_FUNC_PREFIX, i); | ||
| if ((error = LLVMOrcLLJITLookup(comp_ctx->orc_lazyjit, &func_addr, | ||
| func_name))) { | ||
| char *err_msg = LLVMGetErrorMessage(error); | ||
| os_printf("failed to compile orc jit function: %s", err_msg); | ||
| LLVMDisposeErrorMessage(err_msg); | ||
| break; | ||
| } | ||
| /** | ||
| * No need to lock the func_ptr[func_idx] here as it is basic | ||
| * data type, the load/store for it can be finished by one cpu | ||
| * instruction, and there can be only one cpu instruction | ||
| * loading/storing at the same time. | ||
| */ | ||
| module->func_ptrs[i] = (void *)func_addr; | ||
| } | ||
| if (orcjit_stop_compiling) { | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| /* Try to compile functions that haven't been compiled by other threads */ | ||
| for (i = (int32)comp_data->func_count - 1; i > 0; i--) { | ||
| if (orcjit_stop_compiling) { | ||
| break; | ||
| } | ||
| if (!module->func_ptrs[i]) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. L2819 和 L2792 会不会遇到多线程反复赋值的情况。比如:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 按照LLVM的说法,LLVMOrcLLJITLookup本身支持并发操作,并且同一个wasm函数只会被编译一次,编译了之后再次lookup会返回同一个指针: 这边是有可能多线程重复对一个指针赋值,但是即使重复赋值,赋的也是一样的值,另外这个应该不会有多线程读写冲突的问题,因为读写的指针类型是基本的数据类型,可以用一条机器指令完成,同一时刻只能总线上只能有一条指令在读或写一个指针。实际测试了很多case,没有发现有问题。参考: |
||
| snprintf(func_name, sizeof(func_name), "%s%d", AOT_FUNC_PREFIX, i); | ||
| if ((error = LLVMOrcLLJITLookup(comp_ctx->orc_lazyjit, &func_addr, | ||
| func_name))) { | ||
| char *err_msg = LLVMGetErrorMessage(error); | ||
| os_printf("failed to compile orc jit function: %s", err_msg); | ||
| LLVMDisposeErrorMessage(err_msg); | ||
| break; | ||
| } | ||
| module->func_ptrs[i] = (void *)func_addr; | ||
| } | ||
| } | ||
|
|
||
| return NULL; | ||
| } | ||
|
|
||
| static void | ||
| orcjit_stop_compile_threads() | ||
| { | ||
| uint32 i; | ||
wenyongh marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| orcjit_stop_compiling = true; | ||
| for (i = 0; i < WASM_LAZY_JIT_COMPILE_THREAD_NUM; i++) { | ||
| os_thread_join(orcjit_threads[i], NULL); | ||
| } | ||
| } | ||
| #endif | ||
|
|
||
| static AOTModule * | ||
| aot_load_from_comp_data(AOTCompData *comp_data, AOTCompContext *comp_ctx, | ||
| char *error_buf, uint32 error_buf_size) | ||
|
|
@@ -2769,13 +2853,6 @@ aot_load_from_comp_data(AOTCompData *comp_data, AOTCompContext *comp_ctx, | |
| char func_name[32]; | ||
| AOTModule *module; | ||
|
|
||
| #if WASM_ENABLE_LAZY_JIT != 0 | ||
| LLVMOrcThreadSafeModuleRef ts_module; | ||
| LLVMOrcJITDylibRef main_dylib; | ||
| LLVMErrorRef error; | ||
| LLVMOrcJITTargetAddress func_addr = 0; | ||
| #endif | ||
|
|
||
| /* Allocate memory for module */ | ||
| if (!(module = | ||
| loader_malloc(sizeof(AOTModule), error_buf, error_buf_size))) { | ||
|
|
@@ -2839,44 +2916,28 @@ aot_load_from_comp_data(AOTCompData *comp_data, AOTCompContext *comp_ctx, | |
| } | ||
|
|
||
| #if WASM_ENABLE_LAZY_JIT != 0 | ||
| bh_assert(comp_ctx->lazy_orcjit); | ||
|
|
||
| main_dylib = LLVMOrcLLLazyJITGetMainJITDylib(comp_ctx->lazy_orcjit); | ||
| if (!main_dylib) { | ||
| set_error_buf(error_buf, error_buf_size, | ||
| "failed to get dynmaic library reference"); | ||
| goto fail3; | ||
| } | ||
| /* Create threads to compile the wasm functions */ | ||
| for (i = 0; i < WASM_LAZY_JIT_COMPILE_THREAD_NUM; i++) { | ||
| orcjit_thread_args[i].comp_data = comp_data; | ||
| orcjit_thread_args[i].comp_ctx = comp_ctx; | ||
| orcjit_thread_args[i].module = module; | ||
| orcjit_thread_args[i].group_idx = (int32)i; | ||
| orcjit_thread_args[i].group_stride = WASM_LAZY_JIT_COMPILE_THREAD_NUM; | ||
| if (os_thread_create(&orcjit_threads[i], orcjit_thread_callback, | ||
| (void *)&orcjit_thread_args[i], | ||
| APP_THREAD_STACK_SIZE_DEFAULT) | ||
| != 0) { | ||
| uint32 j; | ||
|
|
||
| ts_module = LLVMOrcCreateNewThreadSafeModule(comp_ctx->module, | ||
| comp_ctx->ts_context); | ||
| if (!ts_module) { | ||
| set_error_buf(error_buf, error_buf_size, | ||
| "failed to create thread safe module"); | ||
| goto fail3; | ||
| } | ||
|
|
||
| if ((error = LLVMOrcLLLazyJITAddLLVMIRModule(comp_ctx->lazy_orcjit, | ||
| main_dylib, ts_module))) { | ||
| /* | ||
| * If adding the ThreadSafeModule fails then we need to clean it up | ||
| * ourselves. If adding it succeeds the JIT will manage the memory. | ||
| */ | ||
| aot_handle_llvm_errmsg(error_buf, error_buf_size, | ||
| "failed to addIRModule: ", error); | ||
| goto fail4; | ||
| } | ||
|
|
||
| for (i = 0; i < comp_data->func_count; i++) { | ||
| snprintf(func_name, sizeof(func_name), "%s%d", AOT_FUNC_PREFIX, i); | ||
| if ((error = LLVMOrcLLLazyJITLookup(comp_ctx->lazy_orcjit, &func_addr, | ||
| func_name))) { | ||
| aot_handle_llvm_errmsg(error_buf, error_buf_size, | ||
| "cannot lookup: ", error); | ||
| set_error_buf(error_buf, error_buf_size, | ||
| "create orcjit compile thread failed"); | ||
| /* Terminate the threads created */ | ||
| orcjit_stop_compiling = true; | ||
| for (j = 0; j < i; j++) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 是否要考虑把 os_thread_join 的循环拿到外面?减少反复 join 的次数? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这边是当创建线程失败时,去join其它已经创建的线程,不会反复join。移到外面的话,就要join所有线程,相当于等所有线程都编译好再执行,就不是真正的lazy编译了。这边创建了这些线程让它们进行编译,同时主线程还接着往前走调用jit函数,主线程一旦发现有函数没有编译,也会orc lookup编译一下。 |
||
| os_thread_join(orcjit_threads[j], NULL); | ||
| } | ||
| goto fail3; | ||
| } | ||
| module->func_ptrs[i] = (void *)func_addr; | ||
| func_addr = 0; | ||
| } | ||
| #else | ||
| /* Resolve function addresses */ | ||
|
|
@@ -2897,7 +2958,7 @@ aot_load_from_comp_data(AOTCompData *comp_data, AOTCompContext *comp_ctx, | |
| if (size > 0 | ||
| && !(module->func_type_indexes = | ||
| loader_malloc(size, error_buf, error_buf_size))) { | ||
| goto fail3; | ||
| goto fail4; | ||
| } | ||
| for (i = 0; i < comp_data->func_count; i++) | ||
| module->func_type_indexes[i] = comp_data->funcs[i]->func_type_index; | ||
|
|
@@ -2911,6 +2972,29 @@ aot_load_from_comp_data(AOTCompData *comp_data, AOTCompContext *comp_ctx, | |
| < module->import_func_count + module->func_count); | ||
| /* TODO: fix issue that start func cannot be import func */ | ||
| if (comp_data->start_func_index >= module->import_func_count) { | ||
| #if WASM_ENABLE_LAZY_JIT != 0 | ||
| if (!module->func_ptrs[comp_data->start_func_index | ||
| - module->import_func_count]) { | ||
| LLVMErrorRef error; | ||
| LLVMOrcJITTargetAddress func_addr = 0; | ||
|
|
||
| snprintf(func_name, sizeof(func_name), "%s%d", AOT_FUNC_PREFIX, | ||
| comp_data->start_func_index | ||
| - module->import_func_count); | ||
| if ((error = LLVMOrcLLJITLookup(comp_ctx->orc_lazyjit, | ||
| &func_addr, func_name))) { | ||
| char *err_msg = LLVMGetErrorMessage(error); | ||
| set_error_buf_v(error_buf, error_buf_size, | ||
| "failed to compile orc jit function: %s", | ||
| err_msg); | ||
| LLVMDisposeErrorMessage(err_msg); | ||
| goto fail5; | ||
| } | ||
| module->func_ptrs[comp_data->start_func_index | ||
| - module->import_func_count] = | ||
| (void *)func_addr; | ||
| } | ||
| #endif | ||
| module->start_function = | ||
| module->func_ptrs[comp_data->start_func_index | ||
| - module->import_func_count]; | ||
|
|
@@ -2945,10 +3029,16 @@ aot_load_from_comp_data(AOTCompData *comp_data, AOTCompContext *comp_ctx, | |
| return module; | ||
|
|
||
| #if WASM_ENABLE_LAZY_JIT != 0 | ||
| fail4: | ||
| LLVMOrcDisposeThreadSafeModule(ts_module); | ||
| fail5: | ||
| if (module->func_type_indexes) | ||
| wasm_runtime_free(module->func_type_indexes); | ||
| #endif | ||
|
|
||
| fail4: | ||
| #if WASM_ENABLE_LAZY_JIT != 0 | ||
| /* Terminate all threads before free module->func_ptrs */ | ||
| orcjit_stop_compile_threads(); | ||
| #endif | ||
| fail3: | ||
| if (module->func_ptrs) | ||
| wasm_runtime_free(module->func_ptrs); | ||
|
|
@@ -3034,6 +3124,10 @@ void | |
| aot_unload(AOTModule *module) | ||
| { | ||
| #if WASM_ENABLE_JIT != 0 | ||
| #if WASM_ENABLE_LAZY_JIT != 0 | ||
| orcjit_stop_compile_threads(); | ||
| #endif | ||
|
|
||
| if (module->comp_data) | ||
| aot_destroy_comp_data(module->comp_data); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
可能需要考虑用 volatile 强化线程间共享的变量
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
应该不需要加volatile吧,编译的线程在调用orc lookup编译某个函数后,读这个变量,如果变成true,就不再编译其它函数,退出线程了。即使读到的这个变量还是fasle,最坏情况就是再编译一次下一个函数吧。