diff --git a/base/reflection.jl b/base/reflection.jl index df3368022fc09..4f83dfa500d6a 100644 --- a/base/reflection.jl +++ b/base/reflection.jl @@ -319,8 +319,8 @@ struct DataTypeLayout nfields::UInt32 npointers::UInt32 firstptr::Int32 - alignment::UInt32 - # alignment : 9; + alignment::UInt16 + flags::UInt16 # haspadding : 1; # fielddesc_type : 2; end @@ -335,7 +335,7 @@ function datatype_alignment(dt::DataType) @_pure_meta dt.layout == C_NULL && throw(UndefRefError()) alignment = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).alignment - return Int(alignment & 0x1FF) + return Int(alignment) end # amount of total space taken by T when stored in a container @@ -368,8 +368,8 @@ Can be called on any `isconcretetype`. function datatype_haspadding(dt::DataType) @_pure_meta dt.layout == C_NULL && throw(UndefRefError()) - alignment = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).alignment - return (alignment >> 9) & 1 == 1 + flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags + return flags & 1 == 1 end """ @@ -397,8 +397,8 @@ See also [`fieldoffset`](@ref). function datatype_fielddesc_type(dt::DataType) @_pure_meta dt.layout == C_NULL && throw(UndefRefError()) - alignment = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).alignment - return (alignment >> 10) & 3 + flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags + return (flags >> 1) & 3 end """ diff --git a/src/array.c b/src/array.c index ffdf2857bf479..3d660dc78594b 100644 --- a/src/array.c +++ b/src/array.c @@ -318,7 +318,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data, else { align = elsz = sizeof(void*); } - if (((uintptr_t)data) & (align - 1)) + if (((uintptr_t)data) & ((align > JL_HEAP_ALIGNMENT ? JL_HEAP_ALIGNMENT : align) - 1)) jl_exceptionf(jl_argumenterror_type, "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align); @@ -385,7 +385,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data, else { align = elsz = sizeof(void*); } - if (((uintptr_t)data) & (align - 1)) + if (((uintptr_t)data) & ((align > JL_HEAP_ALIGNMENT ? JL_HEAP_ALIGNMENT : align) - 1)) jl_exceptionf(jl_argumenterror_type, "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align); diff --git a/src/ccall.cpp b/src/ccall.cpp index 7056eb22cd66f..a3d3bc25dcf40 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -418,6 +418,7 @@ static Value *llvm_type_rewrite( from = emit_static_alloca(ctx, from_type); to = emit_bitcast(ctx, from, target_type->getPointerTo()); } + // XXX: deal with possible alignment issues ctx.builder.CreateStore(v, from); return ctx.builder.CreateLoad(to); } @@ -514,7 +515,7 @@ static Value *julia_to_native( tbaa_decorate(jvinfo.tbaa, ctx.builder.CreateStore(emit_unbox(ctx, to, jvinfo, jlto), slot)); } else { - emit_memcpy(ctx, slot, jvinfo.tbaa, jvinfo, jl_datatype_size(jlto), jl_datatype_align(jlto)); + emit_memcpy(ctx, slot, jvinfo.tbaa, jvinfo, jl_datatype_size(jlto), julia_alignment(jlto)); } return slot; } @@ -1945,7 +1946,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( assert(rtsz > 0); Value *strct = emit_allocobj(ctx, rtsz, runtime_bt); MDNode *tbaa = jl_is_mutable(rt) ? tbaa_mutab : tbaa_immut; - int boxalign = jl_datatype_align(rt); + int boxalign = julia_alignment(rt); // copy the data from the return value to the new struct const DataLayout &DL = jl_data_layout; auto resultTy = result->getType(); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index fbeabe667a6de..36793bf11a55f 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -386,8 +386,8 @@ static unsigned julia_alignment(jl_value_t *jt) } assert(jl_is_datatype(jt) && ((jl_datatype_t*)jt)->layout); unsigned alignment = jl_datatype_align(jt); - assert(alignment <= JL_HEAP_ALIGNMENT); - assert(JL_HEAP_ALIGNMENT % alignment == 0); + if (alignment > JL_HEAP_ALIGNMENT) + return JL_HEAP_ALIGNMENT; return alignment; } @@ -594,7 +594,7 @@ static unsigned jl_field_align(jl_datatype_t *dt, size_t i) unsigned al = jl_field_offset(dt, i); al |= 16; al &= -al; - return std::min(al, jl_datatype_align(dt)); + return std::min({al, (unsigned)jl_datatype_align(dt), (unsigned)JL_HEAP_ALIGNMENT}); } static Type *julia_struct_to_llvm(jl_value_t *jt, jl_unionall_t *ua, bool *isboxed, bool llvmcall) @@ -636,7 +636,6 @@ static Type *julia_struct_to_llvm(jl_value_t *jt, jl_unionall_t *ua, bool *isbox if (jst->layout) { assert(isptr == jl_field_isptr(jst, i)); assert((isptr ? sizeof(void*) : fsz + jl_is_uniontype(ty)) == jl_field_size(jst, i)); - assert(al <= jl_field_align(jst, i)); } Type *lty; if (isptr) { @@ -647,8 +646,15 @@ static Type *julia_struct_to_llvm(jl_value_t *jt, jl_unionall_t *ua, bool *isbox lty = T_int8; } else if (jl_is_uniontype(ty)) { - // pick an Integer type size such that alignment will be correct - // and always end with an Int8 (selector byte) + // pick an Integer type size such that alignment will generally be correct, + // and always end with an Int8 (selector byte). + // We may need to insert padding first to get to the right offset + if (al > MAX_ALIGN) { + Type *AlignmentType = ArrayType::get(VectorType::get(T_int8, al), 0); + latypes.push_back(AlignmentType); + al = MAX_ALIGN; + } + assert(al <= jl_field_align(jst, i)); Type *AlignmentType = IntegerType::get(jl_LLVMContext, 8 * al); unsigned NumATy = fsz / al; unsigned remainder = fsz % al; @@ -1212,7 +1218,7 @@ static Value *emit_isconcrete(jl_codectx_t &ctx, Value *typ) { Value *isconcrete; isconcrete = ctx.builder.CreateConstInBoundsGEP1_32(T_int8, emit_bitcast(ctx, decay_derived(typ), T_pint8), offsetof(jl_datatype_t, isconcretetype)); - isconcrete = ctx.builder.CreateLoad(isconcrete, tbaa_const); + isconcrete = ctx.builder.CreateLoad(T_int8, isconcrete, tbaa_const); isconcrete = ctx.builder.CreateTrunc(isconcrete, T_int1); return isconcrete; } @@ -1336,7 +1342,7 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j //} //else { load = ctx.builder.CreateAlignedLoad(data, - isboxed || alignment ? alignment : julia_alignment(jltype), + isboxed || alignment ? alignment : julia_alignment(jltype), false); if (aliasscope) load->setMetadata("alias.scope", aliasscope); @@ -2434,7 +2440,8 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo) static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, const jl_cgval_t &src, Value *skip, bool isVolatile=false) { if (AllocaInst *ai = dyn_cast(dest)) - ctx.builder.CreateStore(UndefValue::get(ai->getAllocatedType()), ai); + // TODO: make this a lifetime_end & dereferencable annotation? + ctx.builder.CreateAlignedStore(UndefValue::get(ai->getAllocatedType()), ai, ai->getAlignment()); if (jl_is_concrete_type(src.typ) || src.constant) { jl_value_t *typ = src.constant ? jl_typeof(src.constant) : src.typ; Type *store_ty = julia_type_to_llvm(typ); @@ -2694,7 +2701,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg if (jl_field_isptr(sty, i)) { fval = boxed(ctx, fval_info); if (!init_as_value) - tbaa_decorate(tbaa_stack, ctx.builder.CreateStore(fval, dest)); + tbaa_decorate(tbaa_stack, ctx.builder.CreateAlignedStore(fval, dest, jl_field_align(sty, i))); } else if (jl_is_uniontype(jtype)) { // compute tindex from rhs diff --git a/src/codegen.cpp b/src/codegen.cpp index dad73e3f08030..a5f05b1fcf4ee 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -2590,7 +2590,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, #ifdef _P64 nva = ctx.builder.CreateTrunc(nva, T_int32); #endif - Value *theArgs = ctx.builder.CreateInBoundsGEP(ctx.argArray, ConstantInt::get(T_size, ctx.nReqArgs)); + Value *theArgs = ctx.builder.CreateInBoundsGEP(T_prjlvalue, ctx.argArray, ConstantInt::get(T_size, ctx.nReqArgs)); Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva }); *ret = mark_julia_type(ctx, r, true, jl_any_type); return true; @@ -2846,13 +2846,13 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) { Value *valen = emit_n_varargs(ctx); jl_cgval_t va_ary( // fake instantiation of a cgval, in order to call emit_bounds_check - ctx.builder.CreateInBoundsGEP(ctx.argArray, ConstantInt::get(T_size, ctx.nReqArgs)), + ctx.builder.CreateInBoundsGEP(T_prjlvalue, ctx.argArray, ConstantInt::get(T_size, ctx.nReqArgs)), NULL, false, NULL, NULL); Value *idx = emit_unbox(ctx, T_size, fld, (jl_value_t*)jl_long_type); jl_value_t *boundscheck = (nargs == 3 ? argv[3].constant : jl_true); idx = emit_bounds_check(ctx, va_ary, NULL, idx, valen, boundscheck); idx = ctx.builder.CreateAdd(idx, ConstantInt::get(T_size, ctx.nReqArgs)); - Instruction *v = ctx.builder.CreateLoad(ctx.builder.CreateInBoundsGEP(ctx.argArray, idx)); + Instruction *v = ctx.builder.CreateLoad(T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.argArray, idx)); // if we know the result type of this load, we will mark that information here too tbaa_decorate(tbaa_value, maybe_mark_load_dereferenceable(v, false, rt)); *ret = mark_julia_type(ctx, v, /*boxed*/ true, rt); @@ -2994,8 +2994,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, Value *idx = emit_unbox(ctx, T_size, fld, (jl_value_t*)jl_long_type); jl_value_t *boundscheck = (nargs == 3 ? argv[3].constant : jl_true); emit_bounds_check(ctx, typ, (jl_value_t*)jl_datatype_type, idx, types_len, boundscheck); - Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(decay_derived(emit_bitcast(ctx, types_svec, T_pprjlvalue)), idx); - Value *fieldtyp = tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(fieldtyp_p)); + Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(T_prjlvalue, decay_derived(emit_bitcast(ctx, types_svec, T_pprjlvalue)), idx); + Value *fieldtyp = tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_prjlvalue, fieldtyp_p)); *ret = mark_julia_type(ctx, fieldtyp, true, (jl_value_t*)jl_type_type); return true; } @@ -3009,7 +3009,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, if (sty == jl_string_type || sty == jl_simplevector_type) { // String and SimpleVector's length fields have the same layout auto ptr = emit_bitcast(ctx, boxed(ctx, obj), T_psize); - Value *len = tbaa_decorate(tbaa_mutab, ctx.builder.CreateLoad(ptr)); + Value *len = tbaa_decorate(tbaa_mutab, ctx.builder.CreateLoad(T_size, ptr)); if (sty == jl_simplevector_type) { len = ctx.builder.CreateMul(len, ConstantInt::get(T_size, sizeof(void*))); len = ctx.builder.CreateAdd(len, ConstantInt::get(T_size, sizeof(void*))); @@ -3424,7 +3424,7 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t GlobalVariable *bindinggv = new GlobalVariable(*ctx.f->getParent(), T_pjlvalue, false, GlobalVariable::InternalLinkage, initnul, name.str()); - Value *cachedval = ctx.builder.CreateLoad(bindinggv); + Value *cachedval = ctx.builder.CreateLoad(T_pjlvalue, bindinggv); BasicBlock *have_val = BasicBlock::Create(jl_LLVMContext, "found"), *not_found = BasicBlock::Create(jl_LLVMContext, "notfound"); BasicBlock *currentbb = ctx.builder.GetInsertBlock(); @@ -3476,7 +3476,7 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i) T_prjlvalue, ctx.spvals_ptr, i + sizeof(jl_svec_t) / sizeof(jl_value_t*)); - Value *sp = tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(bp)); + Value *sp = tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_prjlvalue, bp)); Value *isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp), maybe_decay_untracked(literal_pointer_val(ctx, (jl_value_t*)jl_tvar_type))); jl_unionall_t *sparam = (jl_unionall_t*)ctx.linfo->def.method->sig; @@ -3499,7 +3499,7 @@ static jl_cgval_t emit_global(jl_codectx_t &ctx, jl_sym_t *sym) // double-check that a global variable is actually defined. this // can be a problem in parallel when a definition is missing on // one machine. - return mark_julia_type(ctx, tbaa_decorate(tbaa_binding, ctx.builder.CreateLoad(bp)), true, jl_any_type); + return mark_julia_type(ctx, tbaa_decorate(tbaa_binding, ctx.builder.CreateLoad(T_prjlvalue, bp)), true, jl_any_type); } return emit_checked_var(ctx, bp, sym, false, tbaa_binding); } @@ -3514,15 +3514,15 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym) return mark_julia_const(jl_true); if (vi.boxroot == NULL || vi.pTIndex != NULL) { assert(vi.defFlag); - isnull = ctx.builder.CreateLoad(vi.defFlag, vi.isVolatile); + isnull = ctx.builder.CreateLoad(T_int1, vi.defFlag, vi.isVolatile); } if (vi.boxroot != NULL) { - Value *boxed = ctx.builder.CreateLoad(vi.boxroot, vi.isVolatile); + Value *boxed = ctx.builder.CreateLoad(T_prjlvalue, vi.boxroot, vi.isVolatile); Value *box_isnull = ctx.builder.CreateICmpNE(boxed, maybe_decay_untracked(V_null)); if (vi.pTIndex) { // value is either boxed in the stack slot, or unboxed in value // as indicated by testing (pTIndex & 0x80) - Value *tindex = ctx.builder.CreateLoad(vi.pTIndex, vi.isVolatile); + Value *tindex = ctx.builder.CreateLoad(T_int8, vi.pTIndex, vi.isVolatile); Value *load_unbox = ctx.builder.CreateICmpEQ( ctx.builder.CreateAnd(tindex, ConstantInt::get(T_int8, 0x80)), ConstantInt::get(T_int8, 0)); @@ -3547,7 +3547,7 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym) T_prjlvalue, ctx.spvals_ptr, i + sizeof(jl_svec_t) / sizeof(jl_value_t*)); - Value *sp = tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(bp)); + Value *sp = tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(T_prjlvalue, bp)); isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp), maybe_decay_untracked(literal_pointer_val(ctx, (jl_value_t*)jl_tvar_type))); } @@ -3601,8 +3601,8 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va AllocaInst *ssaslot = cast(varslot->clone()); ssaslot->insertAfter(varslot); if (vi.isVolatile) { - Value *unbox = ctx.builder.CreateLoad(vi.value.V, true); - ctx.builder.CreateStore(unbox, ssaslot); + Value *unbox = ctx.builder.CreateAlignedLoad(ssaslot->getAllocatedType(), varslot, varslot->getAlignment(), true); + ctx.builder.CreateAlignedStore(unbox, ssaslot, ssaslot->getAlignment()); } else { const DataLayout &DL = jl_data_layout; @@ -3611,18 +3611,18 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va } Value *tindex = NULL; if (vi.pTIndex) - tindex = ctx.builder.CreateLoad(vi.pTIndex, vi.isVolatile); + tindex = ctx.builder.CreateLoad(T_int8, vi.pTIndex, vi.isVolatile); v = mark_julia_slot(ssaslot, vi.value.typ, tindex, tbaa_stack); } if (vi.boxroot == NULL) v = update_julia_type(ctx, v, typ); if (vi.usedUndef) { assert(vi.defFlag); - isnull = ctx.builder.CreateLoad(vi.defFlag, vi.isVolatile); + isnull = ctx.builder.CreateLoad(T_int1, vi.defFlag, vi.isVolatile); } } if (vi.boxroot != NULL) { - Instruction *boxed = ctx.builder.CreateLoad(vi.boxroot, vi.isVolatile); + Instruction *boxed = ctx.builder.CreateLoad(T_prjlvalue, vi.boxroot, vi.isVolatile); Value *box_isnull; if (vi.usedUndef) box_isnull = ctx.builder.CreateICmpNE(boxed, maybe_decay_untracked(V_null)); @@ -3714,7 +3714,7 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu if (vi.value.V != rval_info.V) { Value *copy_bytes = ConstantInt::get(T_int32, jl_datatype_size(vi.value.typ)); emit_memcpy(ctx, vi.value.V, tbaa_stack, rval_info, copy_bytes, - jl_datatype_align(rval_info.typ), vi.isVolatile); + julia_alignment(rval_info.typ), vi.isVolatile); } } else { @@ -3803,12 +3803,12 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) dest = emit_static_alloca(ctx, vtype); Value *phi = emit_static_alloca(ctx, vtype); #if JL_LLVM_VERSION >= 70000 - ctx.builder.CreateMemCpy(phi, jl_datatype_align(phiType), + ctx.builder.CreateMemCpy(phi, julia_alignment(phiType), dest, 0, jl_datatype_size(phiType), false); #else ctx.builder.CreateMemCpy(phi, dest, jl_datatype_size(phiType), - jl_datatype_align(phiType), false); + julia_alignment(phiType), false); #endif ctx.builder.CreateLifetimeEnd(dest); slot = mark_julia_slot(phi, phiType, NULL, tbaa_stack); @@ -4663,7 +4663,7 @@ static Function* gen_cfun_wrapper( if (aref) { if (jargty == (jl_value_t*)jl_any_type) { inputarg = mark_julia_type(ctx, - ctx.builder.CreateLoad(emit_bitcast(ctx, val, T_pprjlvalue)), + ctx.builder.CreateLoad(T_prjlvalue, emit_bitcast(ctx, val, T_pprjlvalue)), true, jl_any_type); } else if (static_at && jl_is_concrete_immutable(jargty)) { // anything that could be stored unboxed @@ -5325,7 +5325,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret } else { Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, argArray, i - 1); - theArg = maybe_mark_load_dereferenceable(ctx.builder.CreateLoad(argPtr), false, ty); + theArg = maybe_mark_load_dereferenceable(ctx.builder.CreateLoad(T_prjlvalue, argPtr), false, ty); } if (!isboxed) { theArg = decay_derived(emit_bitcast(ctx, theArg, PointerType::get(lty, 0))); @@ -5345,7 +5345,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret if (retarg == 0) theArg = funcArg; else - theArg = ctx.builder.CreateLoad(ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, argArray, retarg - 1)); + theArg = ctx.builder.CreateLoad(T_prjlvalue, ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, argArray, retarg - 1)); retval = mark_julia_type(ctx, theArg, true, jl_any_type); } else { @@ -6120,8 +6120,8 @@ static std::unique_ptr emit_function( theArg = mark_julia_type(ctx, fArg, true, vi.value.typ); } else { - Value *argPtr = ctx.builder.CreateInBoundsGEP(argArray, ConstantInt::get(T_size, i-1)); - auto load = maybe_mark_load_dereferenceable(ctx.builder.CreateLoad(argPtr), + Value *argPtr = ctx.builder.CreateInBoundsGEP(T_prjlvalue, argArray, ConstantInt::get(T_size, i-1)); + auto load = maybe_mark_load_dereferenceable(ctx.builder.CreateLoad(T_prjlvalue, argPtr), false, vi.value.typ); theArg = mark_julia_type(ctx, load, true, vi.value.typ); if (ctx.debug_enabled && vi.dinfo && !vi.boxroot && !vi.value.V) { @@ -6198,7 +6198,7 @@ static std::unique_ptr emit_function( restTuple = ctx.builder.CreateCall(prepare_call(jltuple_func), { maybe_decay_untracked(V_null), - ctx.builder.CreateInBoundsGEP(argArray, + ctx.builder.CreateInBoundsGEP(T_prjlvalue, argArray, ConstantInt::get(T_size, nreq - 1)), ctx.builder.CreateSub(argCount, ConstantInt::get(T_int32, nreq - 1)) }); diff --git a/src/datatype.c b/src/datatype.c index 3f511ae8d9723..779d0a30c98af 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -105,6 +105,8 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t nfields, jl_fielddesc32_t desc[], uint32_t pointers[]) JL_NOTSAFEPOINT { + assert(alignment); // should have been verified by caller + // compute the smallest fielddesc type that can hold the layout description int fielddesc_type = 0; if (nfields > 0) { @@ -191,7 +193,7 @@ unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t) { if (!jl_is_vecelement_type(t)) return 0; - assert(jl_datatype_nfields(t)==1); + assert(jl_datatype_nfields(t) == 1); jl_value_t *ty = jl_field_type((jl_datatype_t*)t, 0); if (!jl_is_primitivetype(ty)) // LLVM requires that a vector element be a primitive type. @@ -199,16 +201,12 @@ unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t) // motivating use case comes up for Julia, we reject pointers. return 0; size_t elsz = jl_datatype_size(ty); - if (elsz>8 || (1<alignment; alignment*=2 ) - continue; - return alignment; + size_t size = nfields * elsz; + // Use natural alignment for this vector: this matches LLVM and clang. + return next_power_of_two(size); } STATIC_INLINE int jl_is_datatype_make_singleton(jl_datatype_t *d) @@ -452,7 +450,6 @@ void jl_compute_field_offsets(jl_datatype_t *st) haspadding = 1; } } - assert(al <= JL_HEAP_ALIGNMENT && (JL_HEAP_ALIGNMENT % al) == 0); if (al != 0) { size_t alsz = LLT_ALIGN(sz, al); if (sz & (al - 1)) @@ -472,10 +469,7 @@ void jl_compute_field_offsets(jl_datatype_t *st) // Some tuples become LLVM vectors with stronger alignment than what was calculated above. unsigned al = jl_special_vector_alignment(nfields, firstty); assert(al % alignm == 0); - // JL_HEAP_ALIGNMENT is the biggest alignment we can guarantee on the heap. - if (al > JL_HEAP_ALIGNMENT) - alignm = JL_HEAP_ALIGNMENT; - else if (al) + if (al > alignm) alignm = al; } st->size = LLT_ALIGN(sz, alignm); diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 90a2ff695ae77..c7175b014ebbf 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -341,7 +341,7 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va Type *dest_ty = unboxed->getType()->getPointerTo(); if (dest->getType() != dest_ty) dest = emit_bitcast(ctx, dest, dest_ty); - tbaa_decorate(tbaa_dest, ctx.builder.CreateStore(unboxed, dest)); + tbaa_decorate(tbaa_dest, ctx.builder.CreateAlignedStore(unboxed, dest, julia_alignment(jt))); return NULL; } diff --git a/src/julia.h b/src/julia.h index 647ee82be932f..e4e6403945b95 100644 --- a/src/julia.h +++ b/src/julia.h @@ -428,9 +428,9 @@ typedef struct { uint32_t nfields; uint32_t npointers; // number of pointers embedded inside int32_t first_ptr; // index of the first pointer (or -1) - uint32_t alignment : 9; // strictest alignment over all fields - uint32_t haspadding : 1; // has internal undefined bytes - uint32_t fielddesc_type : 2; // 0 -> 8, 1 -> 16, 2 -> 32 + uint16_t alignment; // strictest alignment over all fields + uint16_t haspadding : 1; // has internal undefined bytes + uint16_t fielddesc_type : 2; // 0 -> 8, 1 -> 16, 2 -> 32 // union { // jl_fielddesc8_t field8[nfields]; // jl_fielddesc16_t field16[nfields]; diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index fd45665791389..46cbb3c64f41f 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -938,8 +938,8 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref) // SSA from it are live when we run the allocation again. // It is now safe to promote the allocation to an entry block alloca. size_t align = 1; - // TODO make codegen handling of alignment consistent and pass that as a parameter - // to the allocation function directly. + // TODO: This is overly conservative. May want to instead pass this as a + // parameter to the allocation function directly. if (sz > 1) align = MinAlign(JL_SMALL_BYTE_ALIGNMENT, NextPowerOf2(sz)); // No debug info for prolog instructions diff --git a/test/vecelement.jl b/test/vecelement.jl index e6caa10cd814d..5652ea10d3aa6 100644 --- a/test/vecelement.jl +++ b/test/vecelement.jl @@ -3,7 +3,7 @@ make_value(::Type{T}, i::Integer) where {T<:Integer} = 3*i%T make_value(::Type{T},i::Integer) where {T<:AbstractFloat} = T(3*i) -Vec{N,T} = NTuple{N,Base.VecElement{T}} +const Vec{N,T} = NTuple{N,Base.VecElement{T}} # Crash report for #15244 motivated this test. @generated function thrice_iota(::Type{Vec{N,T}}) where {N,T} @@ -73,6 +73,13 @@ end @test isa(VecElement((1,2)), VecElement{Tuple{Int,Int}}) +# test for alignment agreement (#32414) +@noinline function bar32414(a) + v = ntuple(w -> VecElement(Float64(10w)), Val(8)) + return a, (v, (a, (1e6, 1e9))) +end +@test bar32414(-35.0) === (-35.0, ((VecElement(10.0), VecElement(20.0), VecElement(30.0), VecElement(40.0), VecElement(50.0), VecElement(60.0), VecElement(70.0), VecElement(80.0)), (-35.0, (1.0e6, 1.0e9)))) + # The following test mimic SIMD.jl const _llvmtypes = Dict{DataType, String}( Float64 => "double", @@ -105,8 +112,8 @@ end # Test various SIMD Vectors with known good sizes for T in (Float64, Float32, Int64, Int32) for N in 1:36 - a = ntuple(i->VecElement(T(i)), N) - result = ntuple(i-> VecElement(T(i+i)), N) + a = ntuple(i -> VecElement(T(i)), N) + result = ntuple(i -> VecElement(T(i+i)), N) b = vecadd(a, a) @test b == result b = f20961([a], [a])