1- use rustc_abi:: { Align , Endian , HasDataLayout , Size } ;
1+ use rustc_abi:: { Align , BackendRepr , Endian , HasDataLayout , Primitive , Size , TyAndLayout } ;
2+ use rustc_codegen_ssa:: MemFlags ;
23use rustc_codegen_ssa:: common:: IntPredicate ;
34use rustc_codegen_ssa:: mir:: operand:: OperandRef ;
4- use rustc_codegen_ssa:: traits:: { BaseTypeCodegenMethods , BuilderMethods , ConstCodegenMethods } ;
5+ use rustc_codegen_ssa:: traits:: {
6+ BaseTypeCodegenMethods , BuilderMethods , ConstCodegenMethods , LayoutTypeCodegenMethods ,
7+ } ;
58use rustc_middle:: ty:: Ty ;
69use rustc_middle:: ty:: layout:: { HasTyCtxt , LayoutOf } ;
710
@@ -303,6 +306,313 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
303306 bx. load ( val_type, val_addr, layout. align . abi )
304307}
305308
309+ fn emit_x86_64_sysv64_va_arg < ' ll , ' tcx > (
310+ bx : & mut Builder < ' _ , ' ll , ' tcx > ,
311+ list : OperandRef < ' tcx , & ' ll Value > ,
312+ target_ty : Ty < ' tcx > ,
313+ ) -> & ' ll Value {
314+ let dl = bx. cx . data_layout ( ) ;
315+
316+ // Implementation of the systemv x86_64 ABI calling convention for va_args, see
317+ // https://gitlab.com/x86-psABIs/x86-64-ABI (section 3.5.7). This implementation is heavily
318+ // based on the one in clang.
319+
320+ // We're able to take some shortcuts because the return type of `va_arg` must implement the
321+ // `VaArgSafe` trait. Currently, only pointers, f64, i32, u32, i64 and u64 implement this trait.
322+
323+ // typedef struct __va_list_tag {
324+ // unsigned int gp_offset;
325+ // unsigned int fp_offset;
326+ // void *overflow_arg_area;
327+ // void *reg_save_area;
328+ // } va_list[1];
329+ let va_list_addr = list. immediate ( ) ;
330+
331+ // Peel off any newtype wrappers.
332+ //
333+ // The "C" ABI does not unwrap newtypes (see `ReprOptions::inhibit_newtype_abi_optimization`).
334+ // Here, we do actually want the unwrapped representation, because that is how LLVM/Clang
335+ // pass such types to variadic functions.
336+ //
337+ // An example of a type that must be unwrapped is `Foo` below. Without the unwrapping, it has
338+ // `BackendRepr::Memory`, but we need it to be `BackendRepr::Scalar` to generate correct code.
339+ //
340+ // ```
341+ // #[repr(C)]
342+ // struct Empty;
343+ //
344+ // #[repr(C)]
345+ // struct Foo([Empty; 8], i32);
346+ // ```
347+ let layout = {
348+ let mut layout = bx. cx . layout_of ( target_ty) ;
349+
350+ while let Some ( ( _, inner) ) = layout. non_1zst_field ( bx. cx ) {
351+ layout = inner;
352+ }
353+
354+ layout
355+ } ;
356+
357+ // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
358+ // in the registers. If not go to step 7.
359+
360+ // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
361+ // general purpose registers needed to pass type and num_fp to hold
362+ // the number of floating point registers needed.
363+
364+ let mut num_gp_registers = 0 ;
365+ let mut num_fp_registers = 0 ;
366+
367+ let mut registers_for_primitive = |p| match p {
368+ Primitive :: Int ( integer, _is_signed) => {
369+ num_gp_registers += integer. size ( ) . bytes ( ) . div_ceil ( 8 ) as u32 ;
370+ }
371+ Primitive :: Float ( float) => {
372+ num_fp_registers += float. size ( ) . bytes ( ) . div_ceil ( 16 ) as u32 ;
373+ }
374+ Primitive :: Pointer ( _) => {
375+ num_gp_registers += 1 ;
376+ }
377+ } ;
378+
379+ match layout. layout . backend_repr ( ) {
380+ BackendRepr :: Scalar ( scalar) => {
381+ registers_for_primitive ( scalar. primitive ( ) ) ;
382+ }
383+ BackendRepr :: ScalarPair ( scalar1, scalar2) => {
384+ registers_for_primitive ( scalar1. primitive ( ) ) ;
385+ registers_for_primitive ( scalar2. primitive ( ) ) ;
386+ }
387+ BackendRepr :: SimdVector { .. } => {
388+ // Because no instance of VaArgSafe uses a non-scalar `BackendRepr`.
389+ unreachable ! (
390+ "No x86-64 SysV va_arg implementation for {:?}" ,
391+ layout. layout. backend_repr( )
392+ )
393+ }
394+ BackendRepr :: Memory { .. } => {
395+ let mem_addr = x86_64_sysv64_va_arg_from_memory ( bx, va_list_addr, layout) ;
396+ return bx. load ( layout. llvm_type ( bx) , mem_addr, layout. align . abi ) ;
397+ }
398+ } ;
399+
400+ // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
401+ // registers. In the case: l->gp_offset > 48 - num_gp * 8 or
402+ // l->fp_offset > 176 - num_fp * 16 go to step 7.
403+
404+ let unsigned_int_offset = 4 ;
405+ let ptr_offset = 8 ;
406+ let gp_offset_ptr = va_list_addr;
407+ let fp_offset_ptr = bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( unsigned_int_offset) ) ;
408+
409+ let gp_offset_v = bx. load ( bx. type_i32 ( ) , gp_offset_ptr, Align :: from_bytes ( 8 ) . unwrap ( ) ) ;
410+ let fp_offset_v = bx. load ( bx. type_i32 ( ) , fp_offset_ptr, Align :: from_bytes ( 4 ) . unwrap ( ) ) ;
411+
412+ let mut use_regs = bx. const_bool ( false ) ;
413+
414+ if num_gp_registers > 0 {
415+ let max_offset_val = 48u32 - num_gp_registers * 8 ;
416+ let fits_in_gp = bx. icmp ( IntPredicate :: IntULE , gp_offset_v, bx. const_u32 ( max_offset_val) ) ;
417+ use_regs = fits_in_gp;
418+ }
419+
420+ if num_fp_registers > 0 {
421+ let max_offset_val = 176u32 - num_fp_registers * 16 ;
422+ let fits_in_fp = bx. icmp ( IntPredicate :: IntULE , fp_offset_v, bx. const_u32 ( max_offset_val) ) ;
423+ use_regs = if num_gp_registers > 0 { bx. and ( use_regs, fits_in_fp) } else { fits_in_fp } ;
424+ }
425+
426+ let in_reg = bx. append_sibling_block ( "va_arg.in_reg" ) ;
427+ let in_mem = bx. append_sibling_block ( "va_arg.in_mem" ) ;
428+ let end = bx. append_sibling_block ( "va_arg.end" ) ;
429+
430+ bx. cond_br ( use_regs, in_reg, in_mem) ;
431+
432+ // Emit code to load the value if it was passed in a register.
433+ bx. switch_to_block ( in_reg) ;
434+
435+ // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
436+ // an offset of l->gp_offset and/or l->fp_offset. This may require
437+ // copying to a temporary location in case the parameter is passed
438+ // in different register classes or requires an alignment greater
439+ // than 8 for general purpose registers and 16 for XMM registers.
440+ //
441+ // FIXME(llvm): This really results in shameful code when we end up needing to
442+ // collect arguments from different places; often what should result in a
443+ // simple assembling of a structure from scattered addresses has many more
444+ // loads than necessary. Can we clean this up?
445+ let reg_save_area_ptr =
446+ bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( 2 * unsigned_int_offset + ptr_offset) ) ;
447+ let reg_save_area_v = bx. load ( bx. type_ptr ( ) , reg_save_area_ptr, dl. pointer_align . abi ) ;
448+
449+ let reg_addr = match layout. layout . backend_repr ( ) {
450+ BackendRepr :: Scalar ( scalar) => match scalar. primitive ( ) {
451+ Primitive :: Int ( _, _) | Primitive :: Pointer ( _) => {
452+ let reg_addr = bx. inbounds_ptradd ( reg_save_area_v, gp_offset_v) ;
453+
454+ // Copy into a temporary if the type is more aligned than the register save area.
455+ let gp_align = Align :: from_bytes ( 8 ) . unwrap ( ) ;
456+ copy_to_temporary_if_more_aligned ( bx, reg_addr, layout, gp_align)
457+ }
458+ Primitive :: Float ( _) => bx. inbounds_ptradd ( reg_save_area_v, fp_offset_v) ,
459+ } ,
460+ BackendRepr :: ScalarPair ( scalar1, scalar2) => {
461+ let ty_lo = bx. cx ( ) . scalar_pair_element_backend_type ( layout, 0 , false ) ;
462+ let ty_hi = bx. cx ( ) . scalar_pair_element_backend_type ( layout, 1 , false ) ;
463+
464+ let align_lo = layout. field ( bx. cx , 0 ) . layout . align ( ) . abi ;
465+ let align_hi = layout. field ( bx. cx , 1 ) . layout . align ( ) . abi ;
466+
467+ match ( scalar1. primitive ( ) , scalar2. primitive ( ) ) {
468+ ( Primitive :: Float ( _) , Primitive :: Float ( _) ) => {
469+ // SSE registers are spaced 16 bytes apart in the register save
470+ // area, we need to collect the two eightbytes together.
471+ // The ABI isn't explicit about this, but it seems reasonable
472+ // to assume that the slots are 16-byte aligned, since the stack is
473+ // naturally 16-byte aligned and the prologue is expected to store
474+ // all the SSE registers to the RSA.
475+ let reg_lo_addr = bx. inbounds_ptradd ( reg_save_area_v, fp_offset_v) ;
476+ let reg_hi_addr = bx. inbounds_ptradd ( reg_lo_addr, bx. const_i32 ( 16 ) ) ;
477+
478+ let align = layout. layout . align ( ) . abi ;
479+ let tmp = bx. alloca ( layout. layout . size ( ) , align) ;
480+
481+ let reg_lo = bx. load ( ty_lo, reg_lo_addr, align_lo) ;
482+ let reg_hi = bx. load ( ty_hi, reg_hi_addr, align_hi) ;
483+
484+ let offset = scalar1. size ( bx. cx ) . align_to ( align_hi) . bytes ( ) ;
485+ let field0 = tmp;
486+ let field1 = bx. inbounds_ptradd ( tmp, bx. const_u32 ( offset as u32 ) ) ;
487+
488+ bx. store ( reg_lo, field0, align) ;
489+ bx. store ( reg_hi, field1, align) ;
490+
491+ tmp
492+ }
493+ ( Primitive :: Float ( _) , _) | ( _, Primitive :: Float ( _) ) => {
494+ let gp_addr = bx. inbounds_ptradd ( reg_save_area_v, gp_offset_v) ;
495+ let fp_addr = bx. inbounds_ptradd ( reg_save_area_v, fp_offset_v) ;
496+
497+ let ( reg_lo_addr, reg_hi_addr) = match scalar1. primitive ( ) {
498+ Primitive :: Float ( _) => ( fp_addr, gp_addr) ,
499+ Primitive :: Int ( _, _) | Primitive :: Pointer ( _) => ( gp_addr, fp_addr) ,
500+ } ;
501+
502+ let tmp = bx. alloca ( layout. layout . size ( ) , layout. layout . align ( ) . abi ) ;
503+
504+ let reg_lo = bx. load ( ty_lo, reg_lo_addr, align_lo) ;
505+ let reg_hi = bx. load ( ty_hi, reg_hi_addr, align_hi) ;
506+
507+ let offset = scalar1. size ( bx. cx ) . align_to ( align_hi) . bytes ( ) ;
508+ let field0 = tmp;
509+ let field1 = bx. inbounds_ptradd ( tmp, bx. const_u32 ( offset as u32 ) ) ;
510+
511+ bx. store ( reg_lo, field0, align_lo) ;
512+ bx. store ( reg_hi, field1, align_hi) ;
513+
514+ tmp
515+ }
516+ ( _, _) => {
517+ // Two integer/pointer values are just contiguous in memory.
518+ let reg_addr = bx. inbounds_ptradd ( reg_save_area_v, gp_offset_v) ;
519+
520+ // Copy into a temporary if the type is more aligned than the register save area.
521+ let gp_align = Align :: from_bytes ( 8 ) . unwrap ( ) ;
522+ copy_to_temporary_if_more_aligned ( bx, reg_addr, layout, gp_align)
523+ }
524+ }
525+ }
526+ // The Previous match on `BackendRepr` means control flow already escaped.
527+ BackendRepr :: SimdVector { .. } | BackendRepr :: Memory { .. } => unreachable ! ( ) ,
528+ } ;
529+
530+ // AMD64-ABI 3.5.7p5: Step 5. Set:
531+ // l->gp_offset = l->gp_offset + num_gp * 8
532+ if num_gp_registers > 0 {
533+ let offset = bx. const_u32 ( num_gp_registers * 8 ) ;
534+ let sum = bx. add ( gp_offset_v, offset) ;
535+ // An alignment of 8 because `__va_list_tag` is 8-aligned and this is its first field.
536+ bx. store ( sum, gp_offset_ptr, Align :: from_bytes ( 8 ) . unwrap ( ) ) ;
537+ }
538+
539+ // l->fp_offset = l->fp_offset + num_fp * 16.
540+ if num_fp_registers > 0 {
541+ let offset = bx. const_u32 ( num_fp_registers * 16 ) ;
542+ let sum = bx. add ( fp_offset_v, offset) ;
543+ bx. store ( sum, fp_offset_ptr, Align :: from_bytes ( 4 ) . unwrap ( ) ) ;
544+ }
545+
546+ bx. br ( end) ;
547+
548+ bx. switch_to_block ( in_mem) ;
549+ let mem_addr = x86_64_sysv64_va_arg_from_memory ( bx, va_list_addr, layout) ;
550+ bx. br ( end) ;
551+
552+ bx. switch_to_block ( end) ;
553+
554+ let val_type = layout. llvm_type ( bx) ;
555+ let val_addr = bx. phi ( bx. type_ptr ( ) , & [ reg_addr, mem_addr] , & [ in_reg, in_mem] ) ;
556+
557+ bx. load ( val_type, val_addr, layout. align . abi )
558+ }
559+
560+ /// Copy into a temporary if the type is more aligned than the register save area.
561+ fn copy_to_temporary_if_more_aligned < ' ll , ' tcx > (
562+ bx : & mut Builder < ' _ , ' ll , ' tcx > ,
563+ reg_addr : & ' ll Value ,
564+ layout : TyAndLayout < ' tcx , Ty < ' tcx > > ,
565+ src_align : Align ,
566+ ) -> & ' ll Value {
567+ if layout. layout . align . abi > src_align {
568+ let tmp = bx. alloca ( layout. layout . size ( ) , layout. layout . align ( ) . abi ) ;
569+ bx. memcpy (
570+ tmp,
571+ layout. layout . align . abi ,
572+ reg_addr,
573+ src_align,
574+ bx. const_u32 ( layout. layout . size ( ) . bytes ( ) as u32 ) ,
575+ MemFlags :: empty ( ) ,
576+ ) ;
577+ tmp
578+ } else {
579+ reg_addr
580+ }
581+ }
582+
583+ fn x86_64_sysv64_va_arg_from_memory < ' ll , ' tcx > (
584+ bx : & mut Builder < ' _ , ' ll , ' tcx > ,
585+ va_list_addr : & ' ll Value ,
586+ layout : TyAndLayout < ' tcx , Ty < ' tcx > > ,
587+ ) -> & ' ll Value {
588+ let dl = bx. cx . data_layout ( ) ;
589+
590+ let overflow_arg_area_ptr = bx. inbounds_ptradd ( va_list_addr, bx. const_usize ( 8 ) ) ;
591+
592+ let overflow_arg_area_v = bx. load ( bx. type_ptr ( ) , overflow_arg_area_ptr, dl. pointer_align . abi ) ;
593+ // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
594+ // byte boundary if alignment needed by type exceeds 8 byte boundary.
595+ // It isn't stated explicitly in the standard, but in practice we use
596+ // alignment greater than 16 where necessary.
597+ if layout. layout . align . abi . bytes ( ) > 8 {
598+ unreachable ! ( "all instances of VaArgSafe have an alignment <= 8" ) ;
599+ }
600+
601+ // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
602+ let mem_addr = overflow_arg_area_v;
603+
604+ // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
605+ // l->overflow_arg_area + sizeof(type).
606+ // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
607+ // an 8 byte boundary.
608+ let size_in_bytes = layout. layout . size ( ) . bytes ( ) ;
609+ let offset = bx. const_i32 ( size_in_bytes. next_multiple_of ( 8 ) as i32 ) ;
610+ let overflow_arg_area = bx. inbounds_ptradd ( overflow_arg_area_v, offset) ;
611+ bx. store ( overflow_arg_area, overflow_arg_area_ptr, dl. pointer_align . abi ) ;
612+
613+ mem_addr
614+ }
615+
306616fn emit_xtensa_va_arg < ' ll , ' tcx > (
307617 bx : & mut Builder < ' _ , ' ll , ' tcx > ,
308618 list : OperandRef < ' tcx , & ' ll Value > ,
@@ -334,8 +644,7 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
334644 // (*va).va_ndx
335645 let va_reg_offset = 4 ;
336646 let va_ndx_offset = va_reg_offset + 4 ;
337- let offset_ptr =
338- bx. inbounds_gep ( bx. type_i8 ( ) , va_list_addr, & [ bx. cx . const_usize ( va_ndx_offset) ] ) ;
647+ let offset_ptr = bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( va_ndx_offset) ) ;
339648
340649 let offset = bx. load ( bx. type_i32 ( ) , offset_ptr, bx. tcx ( ) . data_layout . i32_align . abi ) ;
341650 let offset = round_up_to_alignment ( bx, offset, layout. align . abi ) ;
@@ -356,11 +665,10 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
356665 bx. store ( offset_next, offset_ptr, bx. tcx ( ) . data_layout . pointer_align . abi ) ;
357666
358667 // (*va).va_reg
359- let regsave_area_ptr =
360- bx. inbounds_gep ( bx. type_i8 ( ) , va_list_addr, & [ bx. cx . const_usize ( va_reg_offset) ] ) ;
668+ let regsave_area_ptr = bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( va_reg_offset) ) ;
361669 let regsave_area =
362670 bx. load ( bx. type_ptr ( ) , regsave_area_ptr, bx. tcx ( ) . data_layout . pointer_align . abi ) ;
363- let regsave_value_ptr = bx. inbounds_gep ( bx . type_i8 ( ) , regsave_area, & [ offset] ) ;
671+ let regsave_value_ptr = bx. inbounds_ptradd ( regsave_area, offset) ;
364672 bx. br ( end) ;
365673
366674 bx. switch_to_block ( from_stack) ;
@@ -381,9 +689,9 @@ fn emit_xtensa_va_arg<'ll, 'tcx>(
381689 bx. store ( offset_next_corrected, offset_ptr, bx. tcx ( ) . data_layout . pointer_align . abi ) ;
382690
383691 // let stack_value_ptr = unsafe { (*va).va_stk.byte_add(offset_corrected) };
384- let stack_area_ptr = bx. inbounds_gep ( bx . type_i8 ( ) , va_list_addr, & [ bx. cx . const_usize ( 0 ) ] ) ;
692+ let stack_area_ptr = bx. inbounds_ptradd ( va_list_addr, bx. cx . const_usize ( 0 ) ) ;
385693 let stack_area = bx. load ( bx. type_ptr ( ) , stack_area_ptr, bx. tcx ( ) . data_layout . pointer_align . abi ) ;
386- let stack_value_ptr = bx. inbounds_gep ( bx . type_i8 ( ) , stack_area, & [ offset_corrected] ) ;
694+ let stack_value_ptr = bx. inbounds_ptradd ( stack_area, offset_corrected) ;
387695 bx. br ( end) ;
388696
389697 bx. switch_to_block ( end) ;
@@ -449,6 +757,8 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
449757 AllowHigherAlign :: No ,
450758 )
451759 }
760+ // This includes `target.is_like_darwin`, which on x86_64 targets is like sysv64.
761+ "x86_64" => emit_x86_64_sysv64_va_arg ( bx, addr, target_ty) ,
452762 "xtensa" => emit_xtensa_va_arg ( bx, addr, target_ty) ,
453763 // For all other architecture/OS combinations fall back to using
454764 // the LLVM va_arg instruction.
0 commit comments