From 268ac245734d5a3ef4ce9f9d4876aec3fedaa6e6 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Thu, 30 Jan 2020 12:49:05 -0800 Subject: [PATCH] alignment: subtly change meaning of datatype_align (#34473) Rather than meaning the actual alignment of the object, this now means the preferred alignment of the object. The actual alignment of any object is the minimum of this preferred alignment and the alignment supported by the runtime allocator. This aligns us with how LLVM treats alignment, and is probably reasonably sensible anyways. Also tries to audit our existing uses of CreateLoad/CreateStore for correctness, and upgrade some to include pointer-types. fixes #32414 --- base/reflection.jl | 14 +++++------ src/array.c | 4 ++-- src/ccall.cpp | 5 ++-- src/cgutils.cpp | 27 +++++++++++++-------- src/codegen.cpp | 54 +++++++++++++++++++++--------------------- src/datatype.c | 24 +++++++------------ src/intrinsics.cpp | 2 +- src/julia.h | 6 ++--- src/llvm-alloc-opt.cpp | 4 ++-- test/vecelement.jl | 13 +++++++--- 10 files changed, 81 insertions(+), 72 deletions(-) diff --git a/base/reflection.jl b/base/reflection.jl index df3368022fc09..4f83dfa500d6a 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -319,8 +319,8 @@ struct DataTypeLayout nfields::UInt32 npointers::UInt32 firstptr::Int32 - alignment::UInt32 - # alignment : 9; + alignment::UInt16 + flags::UInt16 # haspadding : 1; # fielddesc_type : 2; end @@ -335,7 +335,7 @@ function datatype_alignment(dt::DataType) @_pure_meta dt.layout == C_NULL && throw(UndefRefError()) alignment = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).alignment - return Int(alignment & 0x1FF) + return Int(alignment) end # amount of total space taken by T when stored in a container @@ -368,8 +368,8 @@ Can be called on any `isconcretetype`. function datatype_haspadding(dt::DataType) @_pure_meta dt.layout == C_NULL && throw(UndefRefError()) - alignment = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).alignment - return (alignment >> 9) & 1 == 1 + flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags + return flags & 1 == 1 end """ @@ -397,8 +397,8 @@ See also [`fieldoffset`](@ref). function datatype_fielddesc_type(dt::DataType) @_pure_meta dt.layout == C_NULL && throw(UndefRefError()) - alignment = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).alignment - return (alignment >> 10) & 3 + flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags + return (flags >> 1) & 3 end """ diff --git a/src/array.c b/src/array.c index ffdf2857bf479..3d660dc78594b 100644 --- a/src/array.c +++ b/src/array.c @@ -318,7 +318,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data, else { align = elsz = sizeof(void*); } - if (((uintptr_t)data) & (align - 1)) + if (((uintptr_t)data) & ((align > JL_HEAP_ALIGNMENT ? JL_HEAP_ALIGNMENT : align) - 1)) jl_exceptionf(jl_argumenterror_type, "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align); @@ -385,7 +385,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data, else { align = elsz = sizeof(void*); } - if (((uintptr_t)data) & (align - 1)) + if (((uintptr_t)data) & ((align > JL_HEAP_ALIGNMENT ? JL_HEAP_ALIGNMENT : align) - 1)) jl_exceptionf(jl_argumenterror_type, "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align); diff --git a/src/ccall.cpp b/src/ccall.cpp index 7056eb22cd66f..a3d3bc25dcf40 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -418,6 +418,7 @@ static Value *llvm_type_rewrite( from = emit_static_alloca(ctx, from_type); to = emit_bitcast(ctx, from, target_type->getPointerTo()); } + // XXX: deal with possible alignment issues ctx.builder.CreateStore(v, from); return ctx.builder.CreateLoad(to); } @@ -514,7 +515,7 @@ static Value *julia_to_native( tbaa_decorate(jvinfo.tbaa, ctx.builder.CreateStore(emit_unbox(ctx, to, jvinfo, jlto), slot)); } else { - emit_memcpy(ctx, slot, jvinfo.tbaa, jvinfo, jl_datatype_size(jlto), jl_datatype_align(jlto)); + emit_memcpy(ctx, slot, jvinfo.tbaa, jvinfo, jl_datatype_size(jlto), julia_alignment(jlto)); } return slot; } @@ -1945,7 +1946,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( assert(rtsz > 0); Value *strct = emit_allocobj(ctx, rtsz, runtime_bt); MDNode *tbaa = jl_is_mutable(rt) ? tbaa_mutab : tbaa_immut; - int boxalign = jl_datatype_align(rt); + int boxalign = julia_alignment(rt); // copy the data from the return value to the new struct const DataLayout &DL = jl_data_layout; auto resultTy = result->getType(); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index fbeabe667a6de..36793bf11a55f 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -386,8 +386,8 @@ static unsigned julia_alignment(jl_value_t *jt) } assert(jl_is_datatype(jt) && ((jl_datatype_t*)jt)->layout); unsigned alignment = jl_datatype_align(jt); - assert(alignment <= JL_HEAP_ALIGNMENT); - assert(JL_HEAP_ALIGNMENT % alignment == 0); + if (alignment > JL_HEAP_ALIGNMENT) + return JL_HEAP_ALIGNMENT; return alignment; } @@ -594,7 +594,7 @@ static unsigned jl_field_align(jl_datatype_t *dt, size_t i) unsigned al = jl_field_offset(dt, i); al |= 16; al &= -al; - return std::min(al, jl_datatype_align(dt)); + return std::min({al, (unsigned)jl_datatype_align(dt), (unsigned)JL_HEAP_ALIGNMENT}); } static Type *julia_struct_to_llvm(jl_value_t *jt, jl_unionall_t *ua, bool *isboxed, bool llvmcall) @@ -636,7 +636,6 @@ static Type *julia_struct_to_llvm(jl_value_t *jt, jl_unionall_t *ua, bool *isbox if (jst->layout) { assert(isptr == jl_field_isptr(jst, i)); assert((isptr ? sizeof(void*) : fsz + jl_is_uniontype(ty)) == jl_field_size(jst, i)); - assert(al <= jl_field_align(jst, i)); } Type *lty; if (isptr) { @@ -647,8 +646,15 @@ static Type *julia_struct_to_llvm(jl_value_t *jt, jl_unionall_t *ua, bool *isbox lty = T_int8; } else if (jl_is_uniontype(ty)) { - // pick an Integer type size such that alignment will be correct - // and always end with an Int8 (selector byte) + // pick an Integer type size such that alignment will generally be correct, + // and always end with an Int8 (selector byte). + // We may need to insert padding first to get to the right offset + if (al > MAX_ALIGN) { + Type *AlignmentType = ArrayType::get(VectorType::get(T_int8, al), 0); + latypes.push_back(AlignmentType); + al = MAX_ALIGN; + } + assert(al <= jl_field_align(jst, i)); Type *AlignmentType = IntegerType::get(jl_LLVMContext, 8 * al); unsigned NumATy = fsz / al; unsigned remainder = fsz % al; @@ -1212,7 +1218,7 @@ static Value *emit_isconcrete(jl_codectx_t &ctx, Value *typ) { Value *isconcrete; isconcrete = ctx.builder.CreateConstInBoundsGEP1_32(T_int8, emit_bitcast(ctx, decay_derived(typ), T_pint8), offsetof(jl_datatype_t, isconcretetype)); - isconcrete = ctx.builder.CreateLoad(isconcrete, tbaa_const); + isconcrete = ctx.builder.CreateLoad(T_int8, isconcrete, tbaa_const); isconcrete = ctx.builder.CreateTrunc(isconcrete, T_int1); return isconcrete; } @@ -1336,7 +1342,7 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j //} //else { load = ctx.builder.CreateAlignedLoad(data, - isboxed || alignment ? alignment : julia_alignment(jltype), + isboxed || alignment ? alignment : julia_alignment(jltype), false); if (aliasscope) load->setMetadata("alias.scope", aliasscope); @@ -2434,7 +2440,8 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo) static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, const jl_cgval_t &src, Value *skip, bool isVolatile=false) { if (AllocaInst *ai = dyn_cast(dest)) - ctx.builder.CreateStore(UndefValue::get(ai->getAllocatedType()), ai); + // TODO: make this a lifetime_end & dereferencable annotation? + ctx.builder.CreateAlignedStore(UndefValue::get(ai->getAllocatedType()), ai, ai->getAlignment()); if (jl_is_concrete_type(src.typ) || src.constant) { jl_value_t *typ = src.constant ? jl_typeof(src.constant) : src.typ; Type *store_ty = julia_type_to_llvm(typ); @@ -2694,7 +2701,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg if (jl_field_isptr(sty, i)) { fval = boxed(ctx, fval_info); if (!init_as_value) - tbaa_decorate(tbaa_stack, ctx.builder.CreateStore(fval, dest)); + tbaa_decorate(tbaa_stack, ctx.builder.CreateAlignedStore(fval, dest, jl_field_align(sty, i))); } else if (jl_is_uniontype(jtype)) { // compute tindex from rhs diff --git a/src/codegen.cpp b/src/codegen.cpp index dad73e3f08030..a5f05b1fcf4ee 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -2590,7 +2590,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, #ifdef _P64 nva = ctx.builder.CreateTrunc(nva, T_int32); #endif - Value *theArgs = ctx.builder.CreateInBoundsGEP(ctx.argArray, ConstantInt::get(T_size, ctx.nReqArgs)); + Value *theArgs = ctx.builder.CreateInBoundsGEP(T_prjlvalue, ctx.argArray, ConstantInt::get(T_size, ctx.nReqArgs)); Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva }); *ret = mark_julia_type(ctx, r, true, jl_any_type); return true; @@ -2846,13 +2846,13 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) { Value *valen = emit_n_varargs(ctx); jl_cgval_t va_ary( // fake instantiation of a cgval, in order to call emit_bounds_check - ctx.builder.CreateInBoundsGEP(ctx.argArray, ConstantInt::get(T_size, ctx.nReqArgs)), + ctx.builder.CreateInBoundsGEP(T_prjlvalue, ctx.argArray, ConstantInt::get(T_size, ctx.nReqArgs)), NULL, false, NULL, NULL); Value *idx = emit_unbox(ctx, T_size, fld, (jl_value_t*)jl_long_type); jl_value_t *boundscheck = (nargs == 3 ? argv[3].constant : jl_true); idx = emit_bounds_check(ctx, va_ary, NULL, idx, valen, boundscheck); idx = ctx.builder.CreateAdd(idx, ConstantInt::get(T_size, ctx.nReqArgs)); - Instruction *v = ctx.builder.CreateLoad(ctx.builder.CreateInBoundsGEP(ctx.argArray, idx)); + Instruction *v = ctx.builder.CreateLoad(T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.argArray, idx)); // if we know the result type of this load, we will mark that information here too tbaa_decorate(tbaa_value, maybe_mark_load_dereferenceable(v, false, rt)); *ret = mark_julia_type(ctx, v, /*boxed*/ true, rt); @@ -2994,8 +2994,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, Value *idx = emit_unbox(ctx, T_size, fld, (jl_value_t*)jl_long_type); jl_value_t *boundscheck = (nargs == 3 ? argv[3].constant : jl_true); emit_bounds_check(ctx, typ, (jl_value_t*)jl_datatype_type, idx, types_len, boundscheck); - Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(decay_derived(emit_bitcast(ctx, types_svec, T_pprjlvalue)), idx); - Value *fieldtyp = tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(fieldtyp_p)); + Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(T_prjlvalue, decay_derived(emit_bitcast(ctx, types_svec, T_pprjlvalue)), idx); + Value *fieldtyp = tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_prjlvalue, fieldtyp_p)); *ret = mark_julia_type(ctx, fieldtyp, true, (jl_value_t*)jl_type_type); return true; } @@ -3009,7 +3009,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, if (sty == jl_string_type || sty == jl_simplevector_type) { // String and SimpleVector's length fields have the same layout auto ptr = emit_bitcast(ctx, boxed(ctx, obj), T_psize); - Value *len = tbaa_decorate(tbaa_mutab, ctx.builder.CreateLoad(ptr)); + Value *len = tbaa_decorate(tbaa_mutab, ctx.builder.CreateLoad(T_size, ptr)); if (sty == jl_simplevector_type) { len = ctx.builder.CreateMul(len, ConstantInt::get(T_size, sizeof(void*))); len = ctx.builder.CreateAdd(len, ConstantInt::get(T_size, sizeof(void*))); @@ -3424,7 +3424,7 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t GlobalVariable *bindinggv = new GlobalVariable(*ctx.f->getParent(), T_pjlvalue, false, GlobalVariable::InternalLinkage, initnul, name.str()); - Value *cachedval = ctx.builder.CreateLoad(bindinggv); + Value *cachedval = ctx.builder.CreateLoad(T_pjlvalue, bindinggv); BasicBlock *have_val = BasicBlock::Create(jl_LLVMContext, "found"), *not_found = BasicBlock::Create(jl_LLVMContext, "notfound"); BasicBlock *currentbb = ctx.builder.GetInsertBlock(); @@ -3476,7 +3476,7 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i) T_prjlvalue, ctx.spvals_ptr, i + sizeof(jl_svec_t) / sizeof(jl_value_t*)); - Value *sp = tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(bp)); + Value *sp = tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_prjlvalue, bp)); Value *isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp), maybe_decay_untracked(literal_pointer_val(ctx, (jl_value_t*)jl_tvar_type))); jl_unionall_t *sparam = (jl_unionall_t*)ctx.linfo->def.method->sig; @@ -3499,7 +3499,7 @@ static jl_cgval_t emit_global(jl_codectx_t &ctx, jl_sym_t *sym) // double-check that a global variable is actually defined. this // can be a problem in parallel when a definition is missing on // one machine. - return mark_julia_type(ctx, tbaa_decorate(tbaa_binding, ctx.builder.CreateLoad(bp)), true, jl_any_type); + return mark_julia_type(ctx, tbaa_decorate(tbaa_binding, ctx.builder.CreateLoad(T_prjlvalue, bp)), true, jl_any_type); } return emit_checked_var(ctx, bp, sym, false, tbaa_binding); } @@ -3514,15 +3514,15 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym) return mark_julia_const(jl_true); if (vi.boxroot == NULL || vi.pTIndex != NULL) { assert(vi.defFlag); - isnull = ctx.builder.CreateLoad(vi.defFlag, vi.isVolatile); + isnull = ctx.builder.CreateLoad(T_int1, vi.defFlag, vi.isVolatile); } if (vi.boxroot != NULL) { - Value *boxed = ctx.builder.CreateLoad(vi.boxroot, vi.isVolatile); + Value *boxed = ctx.builder.CreateLoad(T_prjlvalue, vi.boxroot, vi.isVolatile); Value *box_isnull = ctx.builder.CreateICmpNE(boxed, maybe_decay_untracked(V_null)); if (vi.pTIndex) { // value is either boxed in the stack slot, or unboxed in value // as indicated by testing (pTIndex & 0x80) - Value *tindex = ctx.builder.CreateLoad(vi.pTIndex, vi.isVolatile); + Value *tindex = ctx.builder.CreateLoad(T_int8, vi.pTIndex, vi.isVolatile); Value *load_unbox = ctx.builder.CreateICmpEQ( ctx.builder.CreateAnd(tindex, ConstantInt::get(T_int8, 0x80)), ConstantInt::get(T_int8, 0)); @@ -3547,7 +3547,7 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym) T_prjlvalue, ctx.spvals_ptr, i + sizeof(jl_svec_t) / sizeof(jl_value_t*)); - Value *sp = tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(bp)); + Value *sp = tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_prjlvalue, bp)); isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp), maybe_decay_untracked(literal_pointer_val(ctx, (jl_value_t*)jl_tvar_type))); } @@ -3601,8 +3601,8 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va AllocaInst *ssaslot = cast(varslot->clone()); ssaslot->insertAfter(varslot); if (vi.isVolatile) { - Value *unbox = ctx.builder.CreateLoad(vi.value.V, true); - ctx.builder.CreateStore(unbox, ssaslot); + Value *unbox = ctx.builder.CreateAlignedLoad(ssaslot->getAllocatedType(), varslot, varslot->getAlignment(), true); + ctx.builder.CreateAlignedStore(unbox, ssaslot, ssaslot->getAlignment()); } else { const DataLayout &DL = jl_data_layout; @@ -3611,18 +3611,18 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va } Value *tindex = NULL; if (vi.pTIndex) - tindex = ctx.builder.CreateLoad(vi.pTIndex, vi.isVolatile); + tindex = ctx.builder.CreateLoad(T_int8, vi.pTIndex, vi.isVolatile); v = mark_julia_slot(ssaslot, vi.value.typ, tindex, tbaa_stack); } if (vi.boxroot == NULL) v = update_julia_type(ctx, v, typ); if (vi.usedUndef) { assert(vi.defFlag); - isnull = ctx.builder.CreateLoad(vi.defFlag, vi.isVolatile); + isnull = ctx.builder.CreateLoad(T_int1, vi.defFlag, vi.isVolatile); } } if (vi.boxroot != NULL) { - Instruction *boxed = ctx.builder.CreateLoad(vi.boxroot, vi.isVolatile); + Instruction *boxed = ctx.builder.CreateLoad(T_prjlvalue, vi.boxroot, vi.isVolatile); Value *box_isnull; if (vi.usedUndef) box_isnull = ctx.builder.CreateICmpNE(boxed, maybe_decay_untracked(V_null)); @@ -3714,7 +3714,7 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu if (vi.value.V != rval_info.V) { Value *copy_bytes = ConstantInt::get(T_int32, jl_datatype_size(vi.value.typ)); emit_memcpy(ctx, vi.value.V, tbaa_stack, rval_info, copy_bytes, - jl_datatype_align(rval_info.typ), vi.isVolatile); + julia_alignment(rval_info.typ), vi.isVolatile); } } else { @@ -3803,12 +3803,12 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) dest = emit_static_alloca(ctx, vtype); Value *phi = emit_static_alloca(ctx, vtype); #if JL_LLVM_VERSION >= 70000 - ctx.builder.CreateMemCpy(phi, jl_datatype_align(phiType), + ctx.builder.CreateMemCpy(phi, julia_alignment(phiType), dest, 0, jl_datatype_size(phiType), false); #else ctx.builder.CreateMemCpy(phi, dest, jl_datatype_size(phiType), - jl_datatype_align(phiType), false); + julia_alignment(phiType), false); #endif ctx.builder.CreateLifetimeEnd(dest); slot = mark_julia_slot(phi, phiType, NULL, tbaa_stack); @@ -4663,7 +4663,7 @@ static Function* gen_cfun_wrapper( if (aref) { if (jargty == (jl_value_t*)jl_any_type) { inputarg = mark_julia_type(ctx, - ctx.builder.CreateLoad(emit_bitcast(ctx, val, T_pprjlvalue)), + ctx.builder.CreateLoad(T_prjlvalue, emit_bitcast(ctx, val, T_pprjlvalue)), true, jl_any_type); } else if (static_at && jl_is_concrete_immutable(jargty)) { // anything that could be stored unboxed @@ -5325,7 +5325,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret } else { Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, argArray, i - 1); - theArg = maybe_mark_load_dereferenceable(ctx.builder.CreateLoad(argPtr), false, ty); + theArg = maybe_mark_load_dereferenceable(ctx.builder.CreateLoad(T_prjlvalue, argPtr), false, ty); } if (!isboxed) { theArg = decay_derived(emit_bitcast(ctx, theArg, PointerType::get(lty, 0))); @@ -5345,7 +5345,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret if (retarg == 0) theArg = funcArg; else - theArg = ctx.builder.CreateLoad(ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, argArray, retarg - 1)); + theArg = ctx.builder.CreateLoad(T_prjlvalue, ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, argArray, retarg - 1)); retval = mark_julia_type(ctx, theArg, true, jl_any_type); } else { @@ -6120,8 +6120,8 @@ static std::unique_ptr emit_function( theArg = mark_julia_type(ctx, fArg, true, vi.value.typ); } else { - Value *argPtr = ctx.builder.CreateInBoundsGEP(argArray, ConstantInt::get(T_size, i-1)); - auto load = maybe_mark_load_dereferenceable(ctx.builder.CreateLoad(argPtr), + Value *argPtr = ctx.builder.CreateInBoundsGEP(T_prjlvalue, argArray, ConstantInt::get(T_size, i-1)); + auto load = maybe_mark_load_dereferenceable(ctx.builder.CreateLoad(T_prjlvalue, argPtr), false, vi.value.typ); theArg = mark_julia_type(ctx, load, true, vi.value.typ); if (ctx.debug_enabled && vi.dinfo && !vi.boxroot && !vi.value.V) { @@ -6198,7 +6198,7 @@ static std::unique_ptr emit_function( restTuple = ctx.builder.CreateCall(prepare_call(jltuple_func), { maybe_decay_untracked(V_null), - ctx.builder.CreateInBoundsGEP(argArray, + ctx.builder.CreateInBoundsGEP(T_prjlvalue, argArray, ConstantInt::get(T_size, nreq - 1)), ctx.builder.CreateSub(argCount, ConstantInt::get(T_int32, nreq - 1)) }); diff --git a/src/datatype.c b/src/datatype.c index 3f511ae8d9723..779d0a30c98af 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -105,6 +105,8 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t nfields, jl_fielddesc32_t desc[], uint32_t pointers[]) JL_NOTSAFEPOINT { + assert(alignment); // should have been verified by caller + // compute the smallest fielddesc type that can hold the layout description int fielddesc_type = 0; if (nfields > 0) { @@ -191,7 +193,7 @@ unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t) { if (!jl_is_vecelement_type(t)) return 0; - assert(jl_datatype_nfields(t)==1); + assert(jl_datatype_nfields(t) == 1); jl_value_t *ty = jl_field_type((jl_datatype_t*)t, 0); if (!jl_is_primitivetype(ty)) // LLVM requires that a vector element be a primitive type. @@ -199,16 +201,12 @@ unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t) // motivating use case comes up for Julia, we reject pointers. return 0; size_t elsz = jl_datatype_size(ty); - if (elsz>8 || (1<alignment; alignment*=2 ) - continue; - return alignment; + size_t size = nfields * elsz; + // Use natural alignment for this vector: this matches LLVM and clang. + return next_power_of_two(size); } STATIC_INLINE int jl_is_datatype_make_singleton(jl_datatype_t *d) @@ -452,7 +450,6 @@ void jl_compute_field_offsets(jl_datatype_t *st) haspadding = 1; } } - assert(al <= JL_HEAP_ALIGNMENT && (JL_HEAP_ALIGNMENT % al) == 0); if (al != 0) { size_t alsz = LLT_ALIGN(sz, al); if (sz & (al - 1)) @@ -472,10 +469,7 @@ void jl_compute_field_offsets(jl_datatype_t *st) // Some tuples become LLVM vectors with stronger alignment than what was calculated above. unsigned al = jl_special_vector_alignment(nfields, firstty); assert(al % alignm == 0); - // JL_HEAP_ALIGNMENT is the biggest alignment we can guarantee on the heap. - if (al > JL_HEAP_ALIGNMENT) - alignm = JL_HEAP_ALIGNMENT; - else if (al) + if (al > alignm) alignm = al; } st->size = LLT_ALIGN(sz, alignm); diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 90a2ff695ae77..c7175b014ebbf 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -341,7 +341,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va Type *dest_ty = unboxed->getType()->getPointerTo(); if (dest->getType() != dest_ty) dest = emit_bitcast(ctx, dest, dest_ty); - tbaa_decorate(tbaa_dest, ctx.builder.CreateStore(unboxed, dest)); + tbaa_decorate(tbaa_dest, ctx.builder.CreateAlignedStore(unboxed, dest, julia_alignment(jt))); return NULL; } diff --git a/src/julia.h b/src/julia.h index 647ee82be932f..e4e6403945b95 100644 --- a/src/julia.h +++ b/src/julia.h @@ -428,9 +428,9 @@ typedef struct { uint32_t nfields; uint32_t npointers; // number of pointers embedded inside int32_t first_ptr; // index of the first pointer (or -1) - uint32_t alignment : 9; // strictest alignment over all fields - uint32_t haspadding : 1; // has internal undefined bytes - uint32_t fielddesc_type : 2; // 0 -> 8, 1 -> 16, 2 -> 32 + uint16_t alignment; // strictest alignment over all fields + uint16_t haspadding : 1; // has internal undefined bytes + uint16_t fielddesc_type : 2; // 0 -> 8, 1 -> 16, 2 -> 32 // union { // jl_fielddesc8_t field8[nfields]; // jl_fielddesc16_t field16[nfields]; diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index fd45665791389..46cbb3c64f41f 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -938,8 +938,8 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref) // SSA from it are live when we run the allocation again. // It is now safe to promote the allocation to an entry block alloca. size_t align = 1; - // TODO make codegen handling of alignment consistent and pass that as a parameter - // to the allocation function directly. + // TODO: This is overly conservative. May want to instead pass this as a + // parameter to the allocation function directly. if (sz > 1) align = MinAlign(JL_SMALL_BYTE_ALIGNMENT, NextPowerOf2(sz)); // No debug info for prolog instructions diff --git a/test/vecelement.jl b/test/vecelement.jl index e6caa10cd814d..5652ea10d3aa6 100644 --- a/test/vecelement.jl +++ b/test/vecelement.jl @@ -3,7 +3,7 @@ make_value(::Type{T}, i::Integer) where {T<:Integer} = 3*i%T make_value(::Type{T},i::Integer) where {T<:AbstractFloat} = T(3*i) -Vec{N,T} = NTuple{N,Base.VecElement{T}} +const Vec{N,T} = NTuple{N,Base.VecElement{T}} # Crash report for #15244 motivated this test. @generated function thrice_iota(::Type{Vec{N,T}}) where {N,T} @@ -73,6 +73,13 @@ end @test isa(VecElement((1,2)), VecElement{Tuple{Int,Int}}) +# test for alignment agreement (#32414) +@noinline function bar32414(a) + v = ntuple(w -> VecElement(Float64(10w)), Val(8)) + return a, (v, (a, (1e6, 1e9))) +end +@test bar32414(-35.0) === (-35.0, ((VecElement(10.0), VecElement(20.0), VecElement(30.0), VecElement(40.0), VecElement(50.0), VecElement(60.0), VecElement(70.0), VecElement(80.0)), (-35.0, (1.0e6, 1.0e9)))) + # The following test mimic SIMD.jl const _llvmtypes = Dict{DataType, String}( Float64 => "double", @@ -105,8 +112,8 @@ end # Test various SIMD Vectors with known good sizes for T in (Float64, Float32, Int64, Int32) for N in 1:36 - a = ntuple(i->VecElement(T(i)), N) - result = ntuple(i-> VecElement(T(i+i)), N) + a = ntuple(i -> VecElement(T(i)), N) + result = ntuple(i -> VecElement(T(i+i)), N) b = vecadd(a, a) @test b == result b = f20961([a], [a])