|  | 
| 1 |  | -use rustc_abi::{Align, Endian, HasDataLayout, Size}; | 
|  | 1 | +use rustc_abi::{Align, BackendRepr, Endian, HasDataLayout, Primitive, Size, TyAndLayout}; | 
|  | 2 | +use rustc_codegen_ssa::MemFlags; | 
| 2 | 3 | use rustc_codegen_ssa::common::IntPredicate; | 
| 3 | 4 | use rustc_codegen_ssa::mir::operand::OperandRef; | 
| 4 |  | -use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods}; | 
|  | 5 | +use rustc_codegen_ssa::traits::{ | 
|  | 6 | +    BaseTypeCodegenMethods, BuilderMethods, ConstCodegenMethods, LayoutTypeCodegenMethods, | 
|  | 7 | +}; | 
| 5 | 8 | use rustc_middle::ty::Ty; | 
| 6 | 9 | use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf}; | 
| 7 | 10 | 
 | 
| @@ -303,6 +306,313 @@ fn emit_s390x_va_arg<'ll, 'tcx>( | 
| 303 | 306 |     bx.load(val_type, val_addr, layout.align.abi) | 
| 304 | 307 | } | 
| 305 | 308 | 
 | 
|  | 309 | +fn emit_x86_64_sysv64_va_arg<'ll, 'tcx>( | 
|  | 310 | +    bx: &mut Builder<'_, 'll, 'tcx>, | 
|  | 311 | +    list: OperandRef<'tcx, &'ll Value>, | 
|  | 312 | +    target_ty: Ty<'tcx>, | 
|  | 313 | +) -> &'ll Value { | 
|  | 314 | +    let dl = bx.cx.data_layout(); | 
|  | 315 | + | 
|  | 316 | +    // Implementation of the systemv x86_64 ABI calling convention for va_args, see | 
|  | 317 | +    // https://gitlab.com/x86-psABIs/x86-64-ABI (section 3.5.7). This implementation is heavily | 
|  | 318 | +    // based on the one in clang. | 
|  | 319 | + | 
|  | 320 | +    // We're able to take some shortcuts because the return type of `va_arg` must implement the | 
|  | 321 | +    // `VaArgSafe` trait. Currently, only pointers, f64, i32, u32, i64 and u64 implement this trait. | 
|  | 322 | + | 
|  | 323 | +    // typedef struct __va_list_tag { | 
|  | 324 | +    //     unsigned int gp_offset; | 
|  | 325 | +    //     unsigned int fp_offset; | 
|  | 326 | +    //     void *overflow_arg_area; | 
|  | 327 | +    //     void *reg_save_area; | 
|  | 328 | +    // } va_list[1]; | 
|  | 329 | +    let va_list_addr = list.immediate(); | 
|  | 330 | + | 
|  | 331 | +    // Peel off any newtype wrappers. | 
|  | 332 | +    // | 
|  | 333 | +    // The "C" ABI does not unwrap newtypes (see `ReprOptions::inhibit_newtype_abi_optimization`). | 
|  | 334 | +    // Here, we do actually want the unwrapped representation, because that is how LLVM/Clang | 
|  | 335 | +    // pass such types to variadic functions. | 
|  | 336 | +    // | 
|  | 337 | +    // An example of a type that must be unwrapped is `Foo` below. Without the unwrapping, it has | 
|  | 338 | +    // `BackendRepr::Memory`, but we need it to be `BackendRepr::Scalar` to generate correct code. | 
|  | 339 | +    // | 
|  | 340 | +    // ``` | 
|  | 341 | +    // #[repr(C)] | 
|  | 342 | +    // struct Empty; | 
|  | 343 | +    // | 
|  | 344 | +    // #[repr(C)] | 
|  | 345 | +    // struct Foo([Empty; 8], i32); | 
|  | 346 | +    // ``` | 
|  | 347 | +    let layout = { | 
|  | 348 | +        let mut layout = bx.cx.layout_of(target_ty); | 
|  | 349 | + | 
|  | 350 | +        while let Some((_, inner)) = layout.non_1zst_field(bx.cx) { | 
|  | 351 | +            layout = inner; | 
|  | 352 | +        } | 
|  | 353 | + | 
|  | 354 | +        layout | 
|  | 355 | +    }; | 
|  | 356 | + | 
|  | 357 | +    // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed | 
|  | 358 | +    // in the registers. If not go to step 7. | 
|  | 359 | + | 
|  | 360 | +    // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of | 
|  | 361 | +    // general purpose registers needed to pass type and num_fp to hold | 
|  | 362 | +    // the number of floating point registers needed. | 
|  | 363 | + | 
|  | 364 | +    let mut num_gp_registers = 0; | 
|  | 365 | +    let mut num_fp_registers = 0; | 
|  | 366 | + | 
|  | 367 | +    let mut registers_for_primitive = |p| match p { | 
|  | 368 | +        Primitive::Int(integer, _is_signed) => { | 
|  | 369 | +            num_gp_registers += integer.size().bytes().div_ceil(8) as u32; | 
|  | 370 | +        } | 
|  | 371 | +        Primitive::Float(float) => { | 
|  | 372 | +            num_fp_registers += float.size().bytes().div_ceil(16) as u32; | 
|  | 373 | +        } | 
|  | 374 | +        Primitive::Pointer(_) => { | 
|  | 375 | +            num_gp_registers += 1; | 
|  | 376 | +        } | 
|  | 377 | +    }; | 
|  | 378 | + | 
|  | 379 | +    match layout.layout.backend_repr() { | 
|  | 380 | +        BackendRepr::Scalar(scalar) => { | 
|  | 381 | +            registers_for_primitive(scalar.primitive()); | 
|  | 382 | +        } | 
|  | 383 | +        BackendRepr::ScalarPair(scalar1, scalar2) => { | 
|  | 384 | +            registers_for_primitive(scalar1.primitive()); | 
|  | 385 | +            registers_for_primitive(scalar2.primitive()); | 
|  | 386 | +        } | 
|  | 387 | +        BackendRepr::SimdVector { .. } => { | 
|  | 388 | +            // Because no instance of VaArgSafe uses a non-scalar `BackendRepr`. | 
|  | 389 | +            unreachable!( | 
|  | 390 | +                "No x86-64 SysV va_arg implementation for {:?}", | 
|  | 391 | +                layout.layout.backend_repr() | 
|  | 392 | +            ) | 
|  | 393 | +        } | 
|  | 394 | +        BackendRepr::Memory { .. } => { | 
|  | 395 | +            let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout); | 
|  | 396 | +            return bx.load(layout.llvm_type(bx), mem_addr, layout.align.abi); | 
|  | 397 | +        } | 
|  | 398 | +    }; | 
|  | 399 | + | 
|  | 400 | +    // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into | 
|  | 401 | +    // registers. In the case: l->gp_offset > 48 - num_gp * 8 or | 
|  | 402 | +    // l->fp_offset > 176 - num_fp * 16 go to step 7. | 
|  | 403 | + | 
|  | 404 | +    let unsigned_int_offset = 4; | 
|  | 405 | +    let ptr_offset = 8; | 
|  | 406 | +    let gp_offset_ptr = va_list_addr; | 
|  | 407 | +    let fp_offset_ptr = bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(unsigned_int_offset)); | 
|  | 408 | + | 
|  | 409 | +    let gp_offset_v = bx.load(bx.type_i32(), gp_offset_ptr, Align::from_bytes(8).unwrap()); | 
|  | 410 | +    let fp_offset_v = bx.load(bx.type_i32(), fp_offset_ptr, Align::from_bytes(4).unwrap()); | 
|  | 411 | + | 
|  | 412 | +    let mut use_regs = bx.const_bool(false); | 
|  | 413 | + | 
|  | 414 | +    if num_gp_registers > 0 { | 
|  | 415 | +        let max_offset_val = 48u32 - num_gp_registers * 8; | 
|  | 416 | +        let fits_in_gp = bx.icmp(IntPredicate::IntULE, gp_offset_v, bx.const_u32(max_offset_val)); | 
|  | 417 | +        use_regs = fits_in_gp; | 
|  | 418 | +    } | 
|  | 419 | + | 
|  | 420 | +    if num_fp_registers > 0 { | 
|  | 421 | +        let max_offset_val = 176u32 - num_fp_registers * 16; | 
|  | 422 | +        let fits_in_fp = bx.icmp(IntPredicate::IntULE, fp_offset_v, bx.const_u32(max_offset_val)); | 
|  | 423 | +        use_regs = if num_gp_registers > 0 { bx.and(use_regs, fits_in_fp) } else { fits_in_fp }; | 
|  | 424 | +    } | 
|  | 425 | + | 
|  | 426 | +    let in_reg = bx.append_sibling_block("va_arg.in_reg"); | 
|  | 427 | +    let in_mem = bx.append_sibling_block("va_arg.in_mem"); | 
|  | 428 | +    let end = bx.append_sibling_block("va_arg.end"); | 
|  | 429 | + | 
|  | 430 | +    bx.cond_br(use_regs, in_reg, in_mem); | 
|  | 431 | + | 
|  | 432 | +    // Emit code to load the value if it was passed in a register. | 
|  | 433 | +    bx.switch_to_block(in_reg); | 
|  | 434 | + | 
|  | 435 | +    // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with | 
|  | 436 | +    // an offset of l->gp_offset and/or l->fp_offset. This may require | 
|  | 437 | +    // copying to a temporary location in case the parameter is passed | 
|  | 438 | +    // in different register classes or requires an alignment greater | 
|  | 439 | +    // than 8 for general purpose registers and 16 for XMM registers. | 
|  | 440 | +    // | 
|  | 441 | +    // FIXME(llvm): This really results in shameful code when we end up needing to | 
|  | 442 | +    // collect arguments from different places; often what should result in a | 
|  | 443 | +    // simple assembling of a structure from scattered addresses has many more | 
|  | 444 | +    // loads than necessary. Can we clean this up? | 
|  | 445 | +    let reg_save_area_ptr = | 
|  | 446 | +        bx.inbounds_ptradd(va_list_addr, bx.cx.const_usize(2 * unsigned_int_offset + ptr_offset)); | 
|  | 447 | +    let reg_save_area_v = bx.load(bx.type_ptr(), reg_save_area_ptr, dl.pointer_align.abi); | 
|  | 448 | + | 
|  | 449 | +    let reg_addr = match layout.layout.backend_repr() { | 
|  | 450 | +        BackendRepr::Scalar(scalar) => match scalar.primitive() { | 
|  | 451 | +            Primitive::Int(_, _) | Primitive::Pointer(_) => { | 
|  | 452 | +                let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v); | 
|  | 453 | + | 
|  | 454 | +                // Copy into a temporary if the type is more aligned than the register save area. | 
|  | 455 | +                let gp_align = Align::from_bytes(8).unwrap(); | 
|  | 456 | +                copy_to_temporary_if_more_aligned(bx, reg_addr, layout, gp_align) | 
|  | 457 | +            } | 
|  | 458 | +            Primitive::Float(_) => bx.inbounds_ptradd(reg_save_area_v, fp_offset_v), | 
|  | 459 | +        }, | 
|  | 460 | +        BackendRepr::ScalarPair(scalar1, scalar2) => { | 
|  | 461 | +            let ty_lo = bx.cx().scalar_pair_element_backend_type(layout, 0, false); | 
|  | 462 | +            let ty_hi = bx.cx().scalar_pair_element_backend_type(layout, 1, false); | 
|  | 463 | + | 
|  | 464 | +            let align_lo = layout.field(bx.cx, 0).layout.align().abi; | 
|  | 465 | +            let align_hi = layout.field(bx.cx, 1).layout.align().abi; | 
|  | 466 | + | 
|  | 467 | +            match (scalar1.primitive(), scalar2.primitive()) { | 
|  | 468 | +                (Primitive::Float(_), Primitive::Float(_)) => { | 
|  | 469 | +                    // SSE registers are spaced 16 bytes apart in the register save | 
|  | 470 | +                    // area, we need to collect the two eightbytes together. | 
|  | 471 | +                    // The ABI isn't explicit about this, but it seems reasonable | 
|  | 472 | +                    // to assume that the slots are 16-byte aligned, since the stack is | 
|  | 473 | +                    // naturally 16-byte aligned and the prologue is expected to store | 
|  | 474 | +                    // all the SSE registers to the RSA. | 
|  | 475 | +                    let reg_lo_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v); | 
|  | 476 | +                    let reg_hi_addr = bx.inbounds_ptradd(reg_lo_addr, bx.const_i32(16)); | 
|  | 477 | + | 
|  | 478 | +                    let align = layout.layout.align().abi; | 
|  | 479 | +                    let tmp = bx.alloca(layout.layout.size(), align); | 
|  | 480 | + | 
|  | 481 | +                    let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo); | 
|  | 482 | +                    let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi); | 
|  | 483 | + | 
|  | 484 | +                    let offset = scalar1.size(bx.cx).align_to(align_hi).bytes(); | 
|  | 485 | +                    let field0 = tmp; | 
|  | 486 | +                    let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32)); | 
|  | 487 | + | 
|  | 488 | +                    bx.store(reg_lo, field0, align); | 
|  | 489 | +                    bx.store(reg_hi, field1, align); | 
|  | 490 | + | 
|  | 491 | +                    tmp | 
|  | 492 | +                } | 
|  | 493 | +                (Primitive::Float(_), _) | (_, Primitive::Float(_)) => { | 
|  | 494 | +                    let gp_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v); | 
|  | 495 | +                    let fp_addr = bx.inbounds_ptradd(reg_save_area_v, fp_offset_v); | 
|  | 496 | + | 
|  | 497 | +                    let (reg_lo_addr, reg_hi_addr) = match scalar1.primitive() { | 
|  | 498 | +                        Primitive::Float(_) => (fp_addr, gp_addr), | 
|  | 499 | +                        Primitive::Int(_, _) | Primitive::Pointer(_) => (gp_addr, fp_addr), | 
|  | 500 | +                    }; | 
|  | 501 | + | 
|  | 502 | +                    let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi); | 
|  | 503 | + | 
|  | 504 | +                    let reg_lo = bx.load(ty_lo, reg_lo_addr, align_lo); | 
|  | 505 | +                    let reg_hi = bx.load(ty_hi, reg_hi_addr, align_hi); | 
|  | 506 | + | 
|  | 507 | +                    let offset = scalar1.size(bx.cx).align_to(align_hi).bytes(); | 
|  | 508 | +                    let field0 = tmp; | 
|  | 509 | +                    let field1 = bx.inbounds_ptradd(tmp, bx.const_u32(offset as u32)); | 
|  | 510 | + | 
|  | 511 | +                    bx.store(reg_lo, field0, align_lo); | 
|  | 512 | +                    bx.store(reg_hi, field1, align_hi); | 
|  | 513 | + | 
|  | 514 | +                    tmp | 
|  | 515 | +                } | 
|  | 516 | +                (_, _) => { | 
|  | 517 | +                    // Two integer/pointer values are just contiguous in memory. | 
|  | 518 | +                    let reg_addr = bx.inbounds_ptradd(reg_save_area_v, gp_offset_v); | 
|  | 519 | + | 
|  | 520 | +                    // Copy into a temporary if the type is more aligned than the register save area. | 
|  | 521 | +                    let gp_align = Align::from_bytes(8).unwrap(); | 
|  | 522 | +                    copy_to_temporary_if_more_aligned(bx, reg_addr, layout, gp_align) | 
|  | 523 | +                } | 
|  | 524 | +            } | 
|  | 525 | +        } | 
|  | 526 | +        // The Previous match on `BackendRepr` means control flow already escaped. | 
|  | 527 | +        BackendRepr::SimdVector { .. } | BackendRepr::Memory { .. } => unreachable!(), | 
|  | 528 | +    }; | 
|  | 529 | + | 
|  | 530 | +    // AMD64-ABI 3.5.7p5: Step 5. Set: | 
|  | 531 | +    // l->gp_offset = l->gp_offset + num_gp * 8 | 
|  | 532 | +    if num_gp_registers > 0 { | 
|  | 533 | +        let offset = bx.const_u32(num_gp_registers * 8); | 
|  | 534 | +        let sum = bx.add(gp_offset_v, offset); | 
|  | 535 | +        // An alignment of 8 because `__va_list_tag` is 8-aligned and this is its first field. | 
|  | 536 | +        bx.store(sum, gp_offset_ptr, Align::from_bytes(8).unwrap()); | 
|  | 537 | +    } | 
|  | 538 | + | 
|  | 539 | +    // l->fp_offset = l->fp_offset + num_fp * 16. | 
|  | 540 | +    if num_fp_registers > 0 { | 
|  | 541 | +        let offset = bx.const_u32(num_fp_registers * 16); | 
|  | 542 | +        let sum = bx.add(fp_offset_v, offset); | 
|  | 543 | +        bx.store(sum, fp_offset_ptr, Align::from_bytes(4).unwrap()); | 
|  | 544 | +    } | 
|  | 545 | + | 
|  | 546 | +    bx.br(end); | 
|  | 547 | + | 
|  | 548 | +    bx.switch_to_block(in_mem); | 
|  | 549 | +    let mem_addr = x86_64_sysv64_va_arg_from_memory(bx, va_list_addr, layout); | 
|  | 550 | +    bx.br(end); | 
|  | 551 | + | 
|  | 552 | +    bx.switch_to_block(end); | 
|  | 553 | + | 
|  | 554 | +    let val_type = layout.llvm_type(bx); | 
|  | 555 | +    let val_addr = bx.phi(bx.type_ptr(), &[reg_addr, mem_addr], &[in_reg, in_mem]); | 
|  | 556 | + | 
|  | 557 | +    bx.load(val_type, val_addr, layout.align.abi) | 
|  | 558 | +} | 
|  | 559 | + | 
|  | 560 | +/// Copy into a temporary if the type is more aligned than the register save area. | 
|  | 561 | +fn copy_to_temporary_if_more_aligned<'ll, 'tcx>( | 
|  | 562 | +    bx: &mut Builder<'_, 'll, 'tcx>, | 
|  | 563 | +    reg_addr: &'ll Value, | 
|  | 564 | +    layout: TyAndLayout<'tcx, Ty<'tcx>>, | 
|  | 565 | +    src_align: Align, | 
|  | 566 | +) -> &'ll Value { | 
|  | 567 | +    if layout.layout.align.abi > src_align { | 
|  | 568 | +        let tmp = bx.alloca(layout.layout.size(), layout.layout.align().abi); | 
|  | 569 | +        bx.memcpy( | 
|  | 570 | +            tmp, | 
|  | 571 | +            layout.layout.align.abi, | 
|  | 572 | +            reg_addr, | 
|  | 573 | +            src_align, | 
|  | 574 | +            bx.const_u32(layout.layout.size().bytes() as u32), | 
|  | 575 | +            MemFlags::empty(), | 
|  | 576 | +        ); | 
|  | 577 | +        tmp | 
|  | 578 | +    } else { | 
|  | 579 | +        reg_addr | 
|  | 580 | +    } | 
|  | 581 | +} | 
|  | 582 | + | 
|  | 583 | +fn x86_64_sysv64_va_arg_from_memory<'ll, 'tcx>( | 
|  | 584 | +    bx: &mut Builder<'_, 'll, 'tcx>, | 
|  | 585 | +    va_list_addr: &'ll Value, | 
|  | 586 | +    layout: TyAndLayout<'tcx, Ty<'tcx>>, | 
|  | 587 | +) -> &'ll Value { | 
|  | 588 | +    let dl = bx.cx.data_layout(); | 
|  | 589 | + | 
|  | 590 | +    let overflow_arg_area_ptr = bx.inbounds_ptradd(va_list_addr, bx.const_usize(8)); | 
|  | 591 | + | 
|  | 592 | +    let overflow_arg_area_v = bx.load(bx.type_ptr(), overflow_arg_area_ptr, dl.pointer_align.abi); | 
|  | 593 | +    // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16 | 
|  | 594 | +    // byte boundary if alignment needed by type exceeds 8 byte boundary. | 
|  | 595 | +    // It isn't stated explicitly in the standard, but in practice we use | 
|  | 596 | +    // alignment greater than 16 where necessary. | 
|  | 597 | +    if layout.layout.align.abi.bytes() > 8 { | 
|  | 598 | +        unreachable!("all instances of VaArgSafe have an alignment <= 8"); | 
|  | 599 | +    } | 
|  | 600 | + | 
|  | 601 | +    // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. | 
|  | 602 | +    let mem_addr = overflow_arg_area_v; | 
|  | 603 | + | 
|  | 604 | +    // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to: | 
|  | 605 | +    // l->overflow_arg_area + sizeof(type). | 
|  | 606 | +    // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to | 
|  | 607 | +    // an 8 byte boundary. | 
|  | 608 | +    let size_in_bytes = layout.layout.size().bytes(); | 
|  | 609 | +    let offset = bx.const_i32(size_in_bytes.next_multiple_of(8) as i32); | 
|  | 610 | +    let overflow_arg_area = bx.inbounds_ptradd(overflow_arg_area_v, offset); | 
|  | 611 | +    bx.store(overflow_arg_area, overflow_arg_area_ptr, dl.pointer_align.abi); | 
|  | 612 | + | 
|  | 613 | +    mem_addr | 
|  | 614 | +} | 
|  | 615 | + | 
| 306 | 616 | fn emit_xtensa_va_arg<'ll, 'tcx>( | 
| 307 | 617 |     bx: &mut Builder<'_, 'll, 'tcx>, | 
| 308 | 618 |     list: OperandRef<'tcx, &'ll Value>, | 
| @@ -447,6 +757,8 @@ pub(super) fn emit_va_arg<'ll, 'tcx>( | 
| 447 | 757 |                 AllowHigherAlign::No, | 
| 448 | 758 |             ) | 
| 449 | 759 |         } | 
|  | 760 | +        // This includes `target.is_like_darwin`, which on x86_64 targets is like sysv64. | 
|  | 761 | +        "x86_64" => emit_x86_64_sysv64_va_arg(bx, addr, target_ty), | 
| 450 | 762 |         "xtensa" => emit_xtensa_va_arg(bx, addr, target_ty), | 
| 451 | 763 |         // For all other architecture/OS combinations fall back to using | 
| 452 | 764 |         // the LLVM va_arg instruction. | 
|  | 
0 commit comments