Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
9a5aa90
Add some clarifications and fixes for fmt syntax
ehuss Jan 12, 2026
4311139
move initialization of omp/ol runtimes into global_ctor/dtor
ZuseZ4 Jan 9, 2026
89d7695
Rename `DynamicQuery` to `QueryVTable`
Zalathar Jan 25, 2026
2c9175d
Rename trait `QueryConfig` to `QueryDispatcher`
Zalathar Jan 25, 2026
c8975a2
Add `extern core` to diagnostic tests
JonathanBrouwer Jan 27, 2026
4ae692d
Update `askama` version to `0.15.2`
GuillaumeGomez Jan 27, 2026
4a0c044
Update stderrs
JonathanBrouwer Jan 27, 2026
7eae36f
Add an early return if handling multiple offload calls
ZuseZ4 Jan 9, 2026
35ce8ab
adjust testcase for new logic
ZuseZ4 Jan 9, 2026
1f11bf6
Leave note to drop tgt_init_all_rtls in the future
ZuseZ4 Jan 13, 2026
83dcfc8
Update `browser-ui-test` version to `0.23.3`
GuillaumeGomez Jan 27, 2026
31d011a
Add FileCheck annotations to simplify_match.rs
AndrewTKent Jan 27, 2026
6165b72
Fix `x fix`, again
jyn514 Jan 14, 2026
a1893d3
Add support for `trait object` types in `type_info` reflection
izagawd Jan 27, 2026
f488671
Rollup merge of #151239 - izagawd:comptime-reflection-dyn-trait-varia…
Zalathar Jan 28, 2026
3d102a7
Rollup merge of #150893 - ZuseZ4:move-un-register-lib, r=oli-obk
Zalathar Jan 28, 2026
a911804
Rollup merge of #151013 - ehuss:fmt-clarification, r=joboet
Zalathar Jan 28, 2026
384a569
Rollup merge of #151666 - Zalathar:query-vtable, r=Kivooeo
Zalathar Jan 28, 2026
54e60dc
Rollup merge of #151738 - JonathanBrouwer:fix, r=Kivooeo
Zalathar Jan 28, 2026
0f63f2a
Rollup merge of #151747 - GuillaumeGomez:update-askama, r=jieyouxu
Zalathar Jan 28, 2026
5c0b064
Rollup merge of #151759 - GuillaumeGomez:update-browser-ui-test, r=Gu…
Zalathar Jan 28, 2026
c6ea8ab
Rollup merge of #151763 - AndrewTKent:add-filecheck-simplify-match, r…
Zalathar Jan 28, 2026
4d2c8e3
Rollup merge of #151766 - ferrocene:jyn/x-fix, r=jieyouxu
Zalathar Jan 28, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"

[[package]]
name = "askama"
version = "0.15.1"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb7125972258312e79827b60c9eb93938334100245081cf701a2dee981b17427"
checksum = "03341eae1125472b0672fbf35cc9aa7b74cd8e0c3d02f02c28a04678f12aaa7a"
dependencies = [
"askama_macros",
"itoa",
Expand All @@ -197,9 +197,9 @@ dependencies = [

[[package]]
name = "askama_derive"
version = "0.15.1"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ba5e7259a1580c61571e3116ebaaa01e3c001b2132b17c4cc5c70780ca3e994"
checksum = "461bd78f3da90b5e44eee4272cfb1c4832aa3dcdb6c370aedd3eb253d2b9e3ca"
dependencies = [
"askama_parser",
"basic-toml",
Expand All @@ -214,18 +214,18 @@ dependencies = [

[[package]]
name = "askama_macros"
version = "0.15.1"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "236ce20b77cb13506eaf5024899f4af6e12e8825f390bd943c4c37fd8f322e46"
checksum = "ba49fb22ee3074574b8510abd9495d4f0bb9b8f87e8e45ee31e2cee508f7a8e5"
dependencies = [
"askama_derive",
]

[[package]]
name = "askama_parser"
version = "0.15.1"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3c63392767bb2df6aa65a6e1e3b80fd89bb7af6d58359b924c0695620f1512e"
checksum = "7e33eb7484958aaa1f27e9adb556f5d557331cd891bdbb33781bc1f9550b6f6e"
dependencies = [
"rustc-hash 2.1.1",
"serde",
Expand Down
13 changes: 0 additions & 13 deletions compiler/rustc_codegen_llvm/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,19 +188,6 @@ impl<'a, 'll, CX: Borrow<SCx<'ll>>> GenericBuilder<'a, 'll, CX> {
load
}
}

fn memset(&mut self, ptr: &'ll Value, fill_byte: &'ll Value, size: &'ll Value, align: Align) {
unsafe {
llvm::LLVMRustBuildMemSet(
self.llbuilder,
ptr,
align.bytes() as c_uint,
fill_byte,
size,
false,
);
}
}
}

/// Empty string, to be used where LLVM expects an instruction name, indicating
Expand Down
100 changes: 69 additions & 31 deletions compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ pub(crate) struct OffloadGlobals<'ll> {
pub launcher_fn: &'ll llvm::Value,
pub launcher_ty: &'ll llvm::Type,

pub bin_desc: &'ll llvm::Type,

pub kernel_args_ty: &'ll llvm::Type,

pub offload_entry_ty: &'ll llvm::Type,
Expand All @@ -31,8 +29,8 @@ pub(crate) struct OffloadGlobals<'ll> {

pub ident_t_global: &'ll llvm::Value,

pub register_lib: &'ll llvm::Value,
pub unregister_lib: &'ll llvm::Value,
// FIXME(offload): Drop this, once we fully automated our offload compilation pipeline, since
// LLVM will initialize them for us if it sees gpu kernels being registered.
pub init_rtls: &'ll llvm::Value,
}

Expand All @@ -44,15 +42,6 @@ impl<'ll> OffloadGlobals<'ll> {
let (begin_mapper, _, end_mapper, mapper_fn_ty) = gen_tgt_data_mappers(cx);
let ident_t_global = generate_at_one(cx);

let tptr = cx.type_ptr();
let ti32 = cx.type_i32();
let tgt_bin_desc_ty = vec![ti32, tptr, tptr, tptr];
let bin_desc = cx.type_named_struct("struct.__tgt_bin_desc");
cx.set_struct_body(bin_desc, &tgt_bin_desc_ty, false);

let reg_lib_decl = cx.type_func(&[cx.type_ptr()], cx.type_void());
let register_lib = declare_offload_fn(&cx, "__tgt_register_lib", reg_lib_decl);
let unregister_lib = declare_offload_fn(&cx, "__tgt_unregister_lib", reg_lib_decl);
let init_ty = cx.type_func(&[], cx.type_void());
let init_rtls = declare_offload_fn(cx, "__tgt_init_all_rtls", init_ty);

Expand All @@ -63,20 +52,84 @@ impl<'ll> OffloadGlobals<'ll> {
OffloadGlobals {
launcher_fn,
launcher_ty,
bin_desc,
kernel_args_ty,
offload_entry_ty,
begin_mapper,
end_mapper,
mapper_fn_ty,
ident_t_global,
register_lib,
unregister_lib,
init_rtls,
}
}
}

// We need to register offload before using it. We also should unregister it once we are done, for
// good measures. Previously we have done so before and after each individual offload intrinsic
// call, but that comes at a performance cost. The repeated (un)register calls might also confuse
// the LLVM ompOpt pass, which tries to move operations to a better location. The easiest solution,
// which we copy from clang, is to just have those two calls once, in the global ctor/dtor section
// of the final binary.
pub(crate) fn register_offload<'ll>(cx: &CodegenCx<'ll, '_>) {
// First we check quickly whether we already have done our setup, in which case we return early.
// Shouldn't be needed for correctness.
let register_lib_name = "__tgt_register_lib";
if cx.get_function(register_lib_name).is_some() {
return;
}

let reg_lib_decl = cx.type_func(&[cx.type_ptr()], cx.type_void());
let register_lib = declare_offload_fn(&cx, register_lib_name, reg_lib_decl);
let unregister_lib = declare_offload_fn(&cx, "__tgt_unregister_lib", reg_lib_decl);

let ptr_null = cx.const_null(cx.type_ptr());
let const_struct = cx.const_struct(&[cx.get_const_i32(0), ptr_null, ptr_null, ptr_null], false);
let omp_descriptor =
add_global(cx, ".omp_offloading.descriptor", const_struct, InternalLinkage);
// @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 1, ptr @.omp_offloading.device_images, ptr @__start_llvm_offload_entries, ptr @__stop_llvm_offload_entries }
// @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 0, ptr null, ptr null, ptr null }

let atexit = cx.type_func(&[cx.type_ptr()], cx.type_i32());
let atexit_fn = declare_offload_fn(cx, "atexit", atexit);

let desc_ty = cx.type_func(&[], cx.type_void());
let reg_name = ".omp_offloading.descriptor_reg";
let unreg_name = ".omp_offloading.descriptor_unreg";
let desc_reg_fn = declare_offload_fn(cx, reg_name, desc_ty);
let desc_unreg_fn = declare_offload_fn(cx, unreg_name, desc_ty);
llvm::set_linkage(desc_reg_fn, InternalLinkage);
llvm::set_linkage(desc_unreg_fn, InternalLinkage);
llvm::set_section(desc_reg_fn, c".text.startup");
llvm::set_section(desc_unreg_fn, c".text.startup");

// define internal void @.omp_offloading.descriptor_reg() section ".text.startup" {
// entry:
// call void @__tgt_register_lib(ptr @.omp_offloading.descriptor)
// %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg)
// ret void
// }
let bb = Builder::append_block(cx, desc_reg_fn, "entry");
let mut a = Builder::build(cx, bb);
a.call(reg_lib_decl, None, None, register_lib, &[omp_descriptor], None, None);
a.call(atexit, None, None, atexit_fn, &[desc_unreg_fn], None, None);
a.ret_void();

// define internal void @.omp_offloading.descriptor_unreg() section ".text.startup" {
// entry:
// call void @__tgt_unregister_lib(ptr @.omp_offloading.descriptor)
// ret void
// }
let bb = Builder::append_block(cx, desc_unreg_fn, "entry");
let mut a = Builder::build(cx, bb);
a.call(reg_lib_decl, None, None, unregister_lib, &[omp_descriptor], None, None);
a.ret_void();

// @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.omp_offloading.descriptor_reg, ptr null }]
let args = vec![cx.get_const_i32(101), desc_reg_fn, ptr_null];
let const_struct = cx.const_struct(&args, false);
let arr = cx.const_array(cx.val_ty(const_struct), &[const_struct]);
add_global(cx, "llvm.global_ctors", arr, AppendingLinkage);
}

pub(crate) struct OffloadKernelDims<'ll> {
num_workgroups: &'ll Value,
threads_per_block: &'ll Value,
Expand Down Expand Up @@ -487,9 +540,6 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
let tgt_decl = offload_globals.launcher_fn;
let tgt_target_kernel_ty = offload_globals.launcher_ty;

// %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
let tgt_bin_desc = offload_globals.bin_desc;

let tgt_kernel_decl = offload_globals.kernel_args_ty;
let begin_mapper_decl = offload_globals.begin_mapper;
let end_mapper_decl = offload_globals.end_mapper;
Expand All @@ -513,12 +563,9 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
}

// Step 0)
// %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
// %6 = alloca %struct.__tgt_bin_desc, align 8
unsafe {
llvm::LLVMRustPositionBuilderPastAllocas(&builder.llbuilder, builder.llfn());
}
let tgt_bin_desc_alloca = builder.direct_alloca(tgt_bin_desc, Align::EIGHT, "EmptyDesc");

let ty = cx.type_array(cx.type_ptr(), num_args);
// Baseptr are just the input pointer to the kernel, stored in a local alloca
Expand All @@ -536,7 +583,6 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
unsafe {
llvm::LLVMPositionBuilderAtEnd(&builder.llbuilder, bb);
}
builder.memset(tgt_bin_desc_alloca, cx.get_const_i8(0), cx.get_const_i64(32), Align::EIGHT);

// Now we allocate once per function param, a copy to be passed to one of our maps.
let mut vals = vec![];
Expand Down Expand Up @@ -574,15 +620,9 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
geps.push(gep);
}

let mapper_fn_ty = cx.type_func(&[cx.type_ptr()], cx.type_void());
let register_lib_decl = offload_globals.register_lib;
let unregister_lib_decl = offload_globals.unregister_lib;
let init_ty = cx.type_func(&[], cx.type_void());
let init_rtls_decl = offload_globals.init_rtls;

// FIXME(offload): Later we want to add them to the wrapper code, rather than our main function.
// call void @__tgt_register_lib(ptr noundef %6)
builder.call(mapper_fn_ty, None, None, register_lib_decl, &[tgt_bin_desc_alloca], None, None);
// call void @__tgt_init_all_rtls()
builder.call(init_ty, None, None, init_rtls_decl, &[], None, None);

Expand Down Expand Up @@ -679,6 +719,4 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
num_args,
s_ident_t,
);

builder.call(mapper_fn_ty, None, None, unregister_lib_decl, &[tgt_bin_desc_alloca], None, None);
}
4 changes: 4 additions & 0 deletions compiler/rustc_codegen_llvm/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
pub(crate) fn const_null(&self, t: &'ll Type) -> &'ll Value {
unsafe { llvm::LLVMConstNull(t) }
}

pub(crate) fn const_struct(&self, elts: &[&'ll Value], packed: bool) -> &'ll Value {
struct_in_context(self.llcx(), elts, packed)
}
}

impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> {
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_codegen_llvm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ use tracing::debug;
use crate::abi::FnAbiLlvmExt;
use crate::builder::Builder;
use crate::builder::autodiff::{adjust_activity_to_abi, generate_enzyme_call};
use crate::builder::gpu_offload::{OffloadKernelDims, gen_call_handling, gen_define_handling};
use crate::builder::gpu_offload::{
OffloadKernelDims, gen_call_handling, gen_define_handling, register_offload,
};
use crate::context::CodegenCx;
use crate::declare::declare_raw_fn;
use crate::errors::{
Expand Down Expand Up @@ -1402,6 +1404,7 @@ fn codegen_offload<'ll, 'tcx>(
return;
}
};
register_offload(cx);
let offload_data = gen_define_handling(&cx, &metadata, target_symbol, offload_globals);
gen_call_handling(bx, &offload_data, &args, &types, &metadata, offload_globals, &offload_dims);
}
Expand Down
Loading
Loading