From 76efd1992ec1422f5d25a21c3f3b4099cab0f490 Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Mon, 24 Aug 2020 20:35:11 -0400 Subject: [PATCH] Fix TBAA for `DataType` Due to lazy field type initialization, the `types` field is actually mutable in `DataType`. This means that we cannot use immutable or constant TBAA for all `DataType` fields. However, since any objects stored in the field are still rooted over the lifetime of the `DataType` we can still let the GC root placement pass refine any field to the parent `DataType`. Also make use of the field offset to still mark known load from all other fields as constant. This is currently done via a special case but it could also be useful for many other types too. --- src/cgutils.cpp | 13 ++++++++----- src/codegen.cpp | 13 ++++++++++--- src/llvm-late-gc-lowering.cpp | 5 +++-- test/core.jl | 18 ++++++++++++++++++ 4 files changed, 39 insertions(+), 10 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 8a6204c54a3e5..8d8143bb8889b 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -1667,6 +1667,9 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st return ghostValue(jfty); bool maybe_null = idx >= (unsigned)jt->ninitialized; size_t byte_offset = jl_field_offset(jt, idx); + auto tbaa = strct.tbaa; + if (tbaa == tbaa_datatype && byte_offset != offsetof(jl_datatype_t, types)) + tbaa = tbaa_const; if (strct.ispointer()) { Value *staddr = maybe_decay_tracked(ctx, data_pointer(ctx, strct)); bool isboxed; @@ -1699,7 +1702,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st LoadInst *Load = ctx.builder.CreateAlignedLoad(T_prjlvalue, maybe_bitcast(ctx, addr, T_pprjlvalue), Align(sizeof(void*))); Load->setOrdering(AtomicOrdering::Unordered); maybe_mark_load_dereferenceable(Load, maybe_null, jl_field_type(jt, idx)); - Value *fldv = tbaa_decorate(strct.tbaa, Load); + Value *fldv = tbaa_decorate(tbaa, Load); if (maybe_null) null_pointer_check(ctx, fldv); return mark_julia_type(ctx, fldv, true, jfty); @@ -1726,17 +1729,17 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st Type *ET = IntegerType::get(jl_LLVMContext, 8 * al); AllocaInst *lv = emit_static_alloca(ctx, ET); lv->setOperand(0, ConstantInt::get(T_int32, (fsz + al - 1) / al)); - emit_memcpy(ctx, lv, strct.tbaa, addr, strct.tbaa, fsz, al); + emit_memcpy(ctx, lv, tbaa, addr, tbaa, fsz, al); addr = lv; } - return mark_julia_slot(addr, jfty, tindex, strct.tbaa); + return mark_julia_slot(addr, jfty, tindex, tbaa); } else if (!jt->mutabl && !(maybe_null && jfty == (jl_value_t*)jl_bool_type)) { // just compute the pointer and let user load it when necessary - return mark_julia_slot(addr, jfty, NULL, strct.tbaa); + return mark_julia_slot(addr, jfty, NULL, tbaa); } unsigned align = jl_field_align(jt, idx); - return typed_load(ctx, addr, NULL, jfty, strct.tbaa, nullptr, true, align); + return typed_load(ctx, addr, NULL, jfty, tbaa, nullptr, true, align); } else if (isa(strct.V)) { return jl_cgval_t(); diff --git a/src/codegen.cpp b/src/codegen.cpp index e0cb4a4f02236..69a3875655ed4 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -210,6 +210,7 @@ static MDNode *tbaa_data; // Any user data that `pointerset/ref` are allow static MDNode *tbaa_binding; // jl_binding_t::value static MDNode *tbaa_value; // jl_value_t, that is not jl_array_t static MDNode *tbaa_mutab; // mutable type +static MDNode *tbaa_datatype; // datatype static MDNode *tbaa_immut; // immutable type static MDNode *tbaa_ptrarraybuf; // Data in an array of boxed values static MDNode *tbaa_arraybuf; // Data in an array of POD @@ -869,7 +870,7 @@ static MDNode *best_tbaa(jl_value_t *jt) { jt = jl_unwrap_unionall(jt); if (jt == (jl_value_t*)jl_datatype_type || (jl_is_type_type(jt) && jl_is_datatype(jl_tparam0(jt)))) - return tbaa_const; + return tbaa_datatype; if (!jl_is_datatype(jt)) return tbaa_value; if (jl_is_abstracttype(jt)) @@ -3142,6 +3143,9 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, else if (jl_field_isptr(stt, fieldidx) || jl_type_hasptr(jl_field_type(stt, fieldidx))) { Value *fldv; size_t offs = jl_field_offset(stt, fieldidx) / sizeof(jl_value_t*); + auto tbaa = obj.tbaa; + if (tbaa == tbaa_datatype && offs != offsetof(jl_datatype_t, types)) + tbaa = tbaa_const; if (obj.ispointer()) { if (!jl_field_isptr(stt, fieldidx)) offs += ((jl_datatype_t*)jl_field_type(stt, fieldidx))->layout->first_ptr; @@ -3149,7 +3153,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, Value *addr = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, ptr, offs); // emit this using the same type as emit_getfield_knownidx // so that LLVM may be able to load-load forward them and fold the result - fldv = tbaa_decorate(obj.tbaa, ctx.builder.CreateAlignedLoad(T_prjlvalue, addr, Align(sizeof(size_t)))); + fldv = tbaa_decorate(tbaa, ctx.builder.CreateAlignedLoad(T_prjlvalue, addr, Align(sizeof(size_t)))); cast(fldv)->setOrdering(AtomicOrdering::Unordered); } else { @@ -7311,7 +7315,10 @@ static void init_julia_llvm_meta(void) MDNode *tbaa_value_scalar; std::tie(tbaa_value, tbaa_value_scalar) = tbaa_make_child("jtbaa_value", tbaa_data_scalar); - tbaa_mutab = tbaa_make_child("jtbaa_mutab", tbaa_value_scalar).first; + MDNode *tbaa_mutab_scalar; + std::tie(tbaa_mutab, tbaa_mutab_scalar) = + tbaa_make_child("jtbaa_mutab", tbaa_value_scalar); + tbaa_datatype = tbaa_make_child("jtbaa_datatype", tbaa_mutab_scalar).first; tbaa_immut = tbaa_make_child("jtbaa_immut", tbaa_value_scalar).first; tbaa_arraybuf = tbaa_make_child("jtbaa_arraybuf", tbaa_data_scalar).first; tbaa_ptrarraybuf = tbaa_make_child("jtbaa_ptrarraybuf", tbaa_data_scalar).first; diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index e697233dae149..6c0bc5ede0f7e 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1116,7 +1116,7 @@ static bool isLoadFromImmut(LoadInst *LI) if (LI->getMetadata(LLVMContext::MD_invariant_load)) return true; MDNode *TBAA = LI->getMetadata(LLVMContext::MD_tbaa); - if (isTBAA(TBAA, {"jtbaa_immut", "jtbaa_const"})) + if (isTBAA(TBAA, {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype"})) return true; return false; } @@ -1185,7 +1185,8 @@ static bool isLoadFromConstGV(LoadInst *LI, bool &task_local) // but LLVM global merging can change the pointer operands to GEPs/bitcasts auto load_base = LI->getPointerOperand()->stripInBoundsOffsets(); auto gv = dyn_cast(load_base); - if (isTBAA(LI->getMetadata(LLVMContext::MD_tbaa), {"jtbaa_immut", "jtbaa_const"})) { + if (isTBAA(LI->getMetadata(LLVMContext::MD_tbaa), + {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype"})) { if (gv) return true; return isLoadFromConstGV(load_base, task_local); diff --git a/test/core.jl b/test/core.jl index 19f50d9671f95..48c3546029631 100644 --- a/test/core.jl +++ b/test/core.jl @@ -7270,3 +7270,21 @@ primitive type P36104 8 end # Malformed invoke f_bad_invoke(x::Int) = invoke(x, (Any,), x) @test_throws TypeError f_bad_invoke(1) + +# Fixup for #37044, make sure mutation of `types` field of `DataType` is respected. +struct A37044{T1,T2} + x::T1 + y::T2 +end +struct Ref37044 + x::DataType +end +function f37044(r) + t = r.x + if !isdefined(t, :types) + Base.datatype_fieldtypes(t) + end + return t.types +end +r37044 = Ref37044(A37044{Int}.body) +@test f37044(r37044)[1] === Int