From 29380a318c95e9a3c030efda95e38f90ae430619 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 28 Mar 2025 18:00:15 -0700 Subject: [PATCH] Cranelift: remove block params on critical-edge blocks. When a block has a terminator branch that targets two or more other blocks at the CLIF level, and any of these blocks have two or more precessors, the edge is a "critical edge" and we split it (insert a new empty block) so that the register allocator has a place to put moves that happen only on that edge. Otherwise, there is no location that works: in the predecessor, code runs no matter which outgoing edge we take; and in the successor, code runs no matter which incoming edge we came from. Currently, when we generate these critical-edge blocks, we insert exactly one instruction: an unconditional branch. We wire up the blockparam dataflow by (i) adding block parameters to the critical-edge block with the same signature as the original target, and (ii) adding all of these arguments to the unconditional branch. In other words, we maintain the original block signature throughout. This is fine and correct, but it has two downsides. The first is a minor loss in compile-time efficiency (more SSA values and block-params to process). The second, more interesting, is that it hinders future work with certain kinds of branches that may define values *on edges*. In particular, this approach prevents exception-handling support: a `try_call` instruction that acts as a terminator branch (with normal-return and exceptional out-edges) defines normal-return values as block-call arguments that are usable on the normal-return edge. Some of these normal-return values may be defined by loads from a return-value area. These loads need somewhere to go; they can't go "after the terminator" (then it wouldn't be a terminator), so they go in an edge block; as a result, the block-call for the normal-return needs to use its arguments only in the unconditional branch out of the edge block, not in the initial branch to the edge block. This PR alters the critical-edge blockparam handling to have no block-call args on the branch into the edge block, and use the original values (not the newly defined edge-block blockparams) in the block-call out of the edge block. This will allow these values to be possibly defined in the edge block rather than in the predecessor (the block with the original terminator). This has no functional change today other than some perturbation of regalloc decisions and a possibly slight compile-time speedup. --- cranelift/codegen/src/machinst/blockorder.rs | 1 - cranelift/codegen/src/machinst/lower.rs | 115 ++++++++++-------- .../filetests/isa/aarch64/cold-blocks.clif | 16 +-- .../filetests/isa/riscv64/bitops-float.clif | 58 ++++----- .../filetests/isa/riscv64/cold-blocks.clif | 16 +-- .../filetests/filetests/isa/x64/branches.clif | 74 +++++------ .../filetests/isa/x64/crit-edge.clif | 82 +++++++++++++ ...arefully-sink-loads-in-float-compares.clif | 64 +++++----- tests/disas/epoch-interruption-x86.wat | 50 ++++---- tests/disas/pulley/coremark-1.wat | 30 ++--- 10 files changed, 303 insertions(+), 203 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/x64/crit-edge.clif diff --git a/cranelift/codegen/src/machinst/blockorder.rs b/cranelift/codegen/src/machinst/blockorder.rs index b23fcf4859b1..e0ac62bbfdf6 100644 --- a/cranelift/codegen/src/machinst/blockorder.rs +++ b/cranelift/codegen/src/machinst/blockorder.rs @@ -132,7 +132,6 @@ impl LoweredBlock { } /// The associated out-edge successor, if this is a critical edge. - #[cfg(test)] pub fn out_edge(&self) -> Option { match self { &LoweredBlock::CriticalEdge { succ, .. } => Some(succ), diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index a0055c7e5ca6..cfe7fd9f90bf 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -872,7 +872,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { self.vcode.end_bb(); } - fn lower_clif_branches>( + fn lower_clif_branch>( &mut self, backend: &B, // Lowered block index: @@ -883,7 +883,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { targets: &[MachLabel], ) -> CodegenResult<()> { trace!( - "lower_clif_branches: block {} branch {:?} targets {:?}", + "lower_clif_branch: block {} branch {:?} targets {:?}", block, branch, targets, @@ -909,31 +909,17 @@ impl<'func, I: VCodeInst> Lower<'func, I> { } fn lower_branch_blockparam_args(&mut self, block: BlockIndex) { + let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![]; + // TODO: why not make `block_order` public? for succ_idx in 0..self.vcode.block_order().succ_indices(block).1.len() { - // Avoid immutable borrow by explicitly indexing. - let (opt_inst, succs) = self.vcode.block_order().succ_indices(block); - let inst = opt_inst.expect("lower_branch_blockparam_args called on a critical edge!"); - let succ = succs[succ_idx]; - - // The use of `succ_idx` to index `branch_destination` is valid on the assumption that - // the traversal order defined in `visit_block_succs` mirrors the order returned by - // `branch_destination`. If that assumption is violated, the branch targets returned - // here will not match the clif. - let branches = self.f.dfg.insts[inst].branch_destination(&self.f.dfg.jump_tables); - let branch_args = branches[succ_idx].args_slice(&self.f.dfg.value_lists); - - let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![]; - for &arg in branch_args { - debug_assert!(self.f.dfg.value_is_real(arg)); - let regs = self.put_value_in_regs(arg); - branch_arg_vregs.extend_from_slice(regs.regs()); - } - self.vcode.add_succ(succ, &branch_arg_vregs[..]); + branch_arg_vregs.clear(); + let (succ, args) = self.collect_block_call(block, succ_idx, &mut branch_arg_vregs); + self.vcode.add_succ(succ, args); } } - fn collect_branches_and_targets( + fn collect_branch_and_targets( &self, bindex: BlockIndex, _bb: Block, @@ -945,6 +931,56 @@ impl<'func, I: VCodeInst> Lower<'func, I> { opt_inst } + /// Collect the outgoing block-call arguments for a given edge out + /// of a lowered block. + fn collect_block_call<'a>( + &mut self, + block: BlockIndex, + succ_idx: usize, + buffer: &'a mut SmallVec<[Reg; 16]>, + ) -> (BlockIndex, &'a [Reg]) { + let block_order = self.vcode.block_order(); + let (_, succs) = block_order.succ_indices(block); + let succ = succs[succ_idx]; + let this_lb = block_order.lowered_order()[block.index()]; + let succ_lb = block_order.lowered_order()[succ.index()]; + + let (branch_inst, succ_idx) = match (this_lb, succ_lb) { + (_, LoweredBlock::CriticalEdge { .. }) => { + // The successor is a split-critical-edge block. In this + // case, this block-call has no arguments, and the + // arguments go on the critical edge block's unconditional + // branch instead. + return (succ, &[]); + } + (LoweredBlock::CriticalEdge { pred, succ_idx, .. }, _) => { + // This is a split-critical-edge block. In this case, our + // block-call has the arguments that in the CLIF appear in + // the predecessor's branch to this edge. + let branch_inst = self.f.layout.last_inst(pred).unwrap(); + (branch_inst, succ_idx as usize) + } + + (this, _) => { + let block = this.orig_block().unwrap(); + // Ordinary block, with an ordinary block as + // successor. Take the arguments from the branch. + let branch_inst = self.f.layout.last_inst(block).unwrap(); + (branch_inst, succ_idx) + } + }; + + let block_call = + self.f.dfg.insts[branch_inst].branch_destination(&self.f.dfg.jump_tables)[succ_idx]; + let args = block_call.args_slice(&self.f.dfg.value_lists); + for &arg in args { + debug_assert!(self.f.dfg.value_is_real(arg)); + let regs = self.put_value_in_regs(arg); + buffer.extend_from_slice(regs.regs()); + } + (succ, &buffer[..]) + } + /// Lower the function. pub fn lower>( mut self, @@ -981,39 +1017,22 @@ impl<'func, I: VCodeInst> Lower<'func, I> { // Lower the block body in reverse order (see comment in // `lower_clif_block()` for rationale). - // End branches. + // End branch. if let Some(bb) = lb.orig_block() { - if let Some(branch) = self.collect_branches_and_targets(bindex, bb, &mut targets) { - self.lower_clif_branches(backend, bindex, bb, branch, &targets)?; + if let Some(branch) = self.collect_branch_and_targets(bindex, bb, &mut targets) { + self.lower_clif_branch(backend, bindex, bb, branch, &targets)?; self.finish_ir_inst(self.srcloc(branch)); } } else { // If no orig block, this must be a pure edge block; - // get the successor and emit a jump. Add block params - // according to the one successor, and pass them - // through; note that the successor must have an - // original block. - let (_, succs) = self.vcode.block_order().succ_indices(bindex); - let succ = succs[0]; - - let orig_succ = lowered_order[succ.index()]; - let orig_succ = orig_succ - .orig_block() - .expect("Edge block succ must be body block"); - - let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![]; - for ty in self.f.dfg.block_param_types(orig_succ) { - let regs = self.vregs.alloc(ty)?; - for ® in regs.regs() { - branch_arg_vregs.push(reg); - let vreg = reg.to_virtual_reg().unwrap(); - self.vcode.add_block_param(vreg); - } - } - self.vcode.add_succ(succ, &branch_arg_vregs[..]); - + // get the successor and emit a jump. This block has + // no block params; and this jump's block-call args + // will be filled in by + // `lower_branch_blockparam_args`. + let succ = self.vcode.block_order().succ_indices(bindex).1[0]; self.emit(I::gen_jump(MachLabel::from_block(succ))); self.finish_ir_inst(Default::default()); + self.lower_branch_blockparam_args(bindex); } // Original block body. diff --git a/cranelift/filetests/filetests/isa/aarch64/cold-blocks.clif b/cranelift/filetests/filetests/isa/aarch64/cold-blocks.clif index 5a377557a9ba..2bc2781eed90 100644 --- a/cranelift/filetests/filetests/isa/aarch64/cold-blocks.clif +++ b/cranelift/filetests/filetests/isa/aarch64/cold-blocks.clif @@ -16,8 +16,8 @@ block2: ; VCode: ; block0: -; mov w5, w0 -; cbnz x5, label1 ; b label2 +; mov w4, w0 +; cbnz x4, label1 ; b label2 ; block1: ; b label3 ; block2: @@ -28,8 +28,8 @@ block2: ; ; Disassembled: ; block0: ; offset 0x0 -; mov w5, w0 -; cbnz x5, #0xc +; mov w4, w0 +; cbnz x4, #0xc ; block1: ; offset 0x8 ; mov w0, #0x61 ; block2: ; offset 0xc @@ -49,8 +49,8 @@ block2 cold: ; VCode: ; block0: -; mov w5, w0 -; cbnz x5, label1 ; b label2 +; mov w4, w0 +; cbnz x4, label1 ; b label2 ; block1: ; b label3 ; block3: @@ -61,8 +61,8 @@ block2 cold: ; ; Disassembled: ; block0: ; offset 0x0 -; mov w5, w0 -; cbz x5, #0xc +; mov w4, w0 +; cbz x4, #0xc ; block1: ; offset 0x8 ; ret ; block2: ; offset 0xc diff --git a/cranelift/filetests/filetests/isa/riscv64/bitops-float.clif b/cranelift/filetests/filetests/isa/riscv64/bitops-float.clif index a302407c6dfc..649dbc6c5964 100644 --- a/cranelift/filetests/filetests/isa/riscv64/bitops-float.clif +++ b/cranelift/filetests/filetests/isa/riscv64/bitops-float.clif @@ -22,19 +22,19 @@ block1(v4: f32): ; VCode: ; block0: ; li a0,0 -; fmv.w.x fa1,zero -; fmv.x.w a5,fa1 -; not a1,a5 -; fmv.w.x fa3,a1 -; fmv.x.w a4,fa3 -; fmv.x.w a1,fa3 -; or a2,a4,a1 -; fmv.w.x fa2,a2 -; br_table a0,[MachLabel(1),MachLabel(2)]##tmp1=a2,tmp2=a1 +; fmv.w.x fa5,zero +; fmv.x.w a3,fa5 +; not a5,a3 +; fmv.w.x fa1,a5 +; fmv.x.w a2,fa1 +; fmv.x.w a4,fa1 +; or a1,a2,a4 +; fmv.w.x fa2,a1 +; br_table a0,[MachLabel(1),MachLabel(2)]##tmp1=a4,tmp2=a5 ; block1: ; j label3 ; block2: -; fmv.d fa2,fa1 +; fmv.d fa2,fa5 ; j label3 ; block3: ; ret @@ -42,30 +42,30 @@ block1(v4: f32): ; Disassembled: ; block0: ; offset 0x0 ; mv a0, zero -; fmv.w.x fa1, zero -; fmv.x.w a5, fa1 -; not a1, a5 -; fmv.w.x fa3, a1 -; fmv.x.w a4, fa3 -; fmv.x.w a1, fa3 -; or a2, a4, a1 -; fmv.w.x fa2, a2 +; fmv.w.x fa5, zero +; fmv.x.w a3, fa5 +; not a5, a3 +; fmv.w.x fa1, a5 +; fmv.x.w a2, fa1 +; fmv.x.w a4, fa1 +; or a1, a2, a4 +; fmv.w.x fa2, a1 ; slli t6, a0, 0x20 ; srli t6, t6, 0x20 -; addi a1, zero, 1 -; bltu t6, a1, 0xc -; auipc a1, 0 -; jalr zero, a1, 0x28 -; auipc a2, 0 -; slli a1, t6, 3 -; add a2, a2, a1 -; jalr zero, a2, 0x10 -; auipc a1, 0 -; jalr zero, a1, 0xc +; addi a5, zero, 1 +; bltu t6, a5, 0xc +; auipc a5, 0 +; jalr zero, a5, 0x28 +; auipc a4, 0 +; slli a5, t6, 3 +; add a4, a4, a5 +; jalr zero, a4, 0x10 +; auipc a5, 0 +; jalr zero, a5, 0xc ; block1: ; offset 0x54 ; j 8 ; block2: ; offset 0x58 -; fmv.d fa2, fa1 +; fmv.d fa2, fa5 ; block3: ; offset 0x5c ; ret diff --git a/cranelift/filetests/filetests/isa/riscv64/cold-blocks.clif b/cranelift/filetests/filetests/isa/riscv64/cold-blocks.clif index 98db409e8a61..7b6fa896f8fa 100644 --- a/cranelift/filetests/filetests/isa/riscv64/cold-blocks.clif +++ b/cranelift/filetests/filetests/isa/riscv64/cold-blocks.clif @@ -16,8 +16,8 @@ block2: ; VCode: ; block0: -; sext.w a5,a0 -; bne a5,zero,taken(label1),not_taken(label2) +; sext.w a4,a0 +; bne a4,zero,taken(label1),not_taken(label2) ; block1: ; j label3 ; block2: @@ -28,8 +28,8 @@ block2: ; ; Disassembled: ; block0: ; offset 0x0 -; sext.w a5, a0 -; bnez a5, 8 +; sext.w a4, a0 +; bnez a4, 8 ; block1: ; offset 0x8 ; addi a0, zero, 0x61 ; block2: ; offset 0xc @@ -49,8 +49,8 @@ block2 cold: ; VCode: ; block0: -; sext.w a5,a0 -; bne a5,zero,taken(label1),not_taken(label2) +; sext.w a4,a0 +; bne a4,zero,taken(label1),not_taken(label2) ; block1: ; j label3 ; block3: @@ -61,8 +61,8 @@ block2 cold: ; ; Disassembled: ; block0: ; offset 0x0 -; sext.w a5, a0 -; beqz a5, 8 +; sext.w a4, a0 +; beqz a4, 8 ; block1: ; offset 0x8 ; ret ; block2: ; offset 0xc diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif index 061b43b56ba4..545fd4b99e96 100644 --- a/cranelift/filetests/filetests/isa/x64/branches.clif +++ b/cranelift/filetests/filetests/isa/x64/branches.clif @@ -1039,28 +1039,28 @@ block1(v5: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $1, %r10d -; movl $2, %r11d -; movl $3, %esi +; movl $1, %r8d +; movl $2, %ecx +; movl $3, %edx ; movl $4, %eax -; movl $4, %r8d -; movl %edi, %r9d -; cmpl %r8d, %r9d -; cmovbl %r9d, %r8d, %r8d -; br_table %r8, %rdi, %rcx +; movl $4, %esi +; movl %edi, %edi +; cmpl %esi, %edi +; cmovbl %edi, %esi, %esi +; br_table %rsi, %r10, %r9 ; block1: ; jmp label6 ; block2: -; movq %r10, %rax +; movq %r8, %rax ; jmp label6 ; block3: -; movq %r11, %rax +; movq %rcx, %rax ; jmp label6 ; block4: -; movq %r11, %rax +; movq %rcx, %rax ; jmp label6 ; block5: -; movq %rsi, %rax +; movq %rdx, %rax ; jmp label6 ; block6: ; movq %rbp, %rsp @@ -1072,18 +1072,18 @@ block1(v5: i32): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movl $1, %r10d -; movl $2, %r11d -; movl $3, %esi +; movl $1, %r8d +; movl $2, %ecx +; movl $3, %edx ; movl $4, %eax -; movl $4, %r8d -; movl %edi, %r9d -; cmpl %r8d, %r9d -; cmovbl %r9d, %r8d -; leaq 9(%rip), %rdi -; movslq (%rdi, %r8, 4), %rcx -; addq %rcx, %rdi -; jmpq *%rdi +; movl $4, %esi +; movl %edi, %edi +; cmpl %esi, %edi +; cmovbl %edi, %esi +; leaq 0xa(%rip), %r10 +; movslq (%r10, %rsi, 4), %r9 +; addq %r9, %r10 +; jmpq *%r10 ; sbbl %eax, (%rax) ; addb %al, (%rax) ; andl %eax, (%rax) @@ -1094,20 +1094,20 @@ block1(v5: i32): ; addb %al, (%rax) ; xorb $0, %al ; addb %al, (%rax) -; block2: ; offset 0x4e -; jmp 0x6e -; block3: ; offset 0x53 -; movq %r10, %rax -; jmp 0x6e -; block4: ; offset 0x5b -; movq %r11, %rax -; jmp 0x6e -; block5: ; offset 0x63 -; movq %r11, %rax -; jmp 0x6e -; block6: ; offset 0x6b -; movq %rsi, %rax -; block7: ; offset 0x6e +; block2: ; offset 0x4a +; jmp 0x6a +; block3: ; offset 0x4f +; movq %r8, %rax +; jmp 0x6a +; block4: ; offset 0x57 +; movq %rcx, %rax +; jmp 0x6a +; block5: ; offset 0x5f +; movq %rcx, %rax +; jmp 0x6a +; block6: ; offset 0x67 +; movq %rdx, %rax +; block7: ; offset 0x6a ; movq %rbp, %rsp ; popq %rbp ; retq diff --git a/cranelift/filetests/filetests/isa/x64/crit-edge.clif b/cranelift/filetests/filetests/isa/x64/crit-edge.clif new file mode 100644 index 000000000000..99cecae46beb --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/crit-edge.clif @@ -0,0 +1,82 @@ +test compile precise-output +target x86_64 + +function %f(i32) -> i32 { + block0(v0: i32): + v1 = iadd_imm.i32 v0, 1 + brif v0, block1(v0), block2(v1) + + block1(v2: i32): + brif v2, block3(v0), block4(v1) + + block2(v3: i32): + brif v3, block3(v1), block4(v0) + + block3(v4: i32): + return v4 + + block4(v5: i32): + return v5 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lea 1(%rdi), %eax +; testl %edi, %edi +; jnz label4; j label1 +; block1: +; testl %eax, %eax +; jnz label2; j label3 +; block2: +; jmp label8 +; block3: +; movq %rdi, %rax +; jmp label7 +; block4: +; testl %edi, %edi +; jnz label5; j label6 +; block5: +; movq %rdi, %rax +; jmp label8 +; block6: +; jmp label7 +; block7: +; movq %rbp, %rsp +; popq %rbp +; ret +; block8: +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; leal 1(%rdi), %eax +; testl %edi, %edi +; jne 0x1f +; block2: ; offset 0xf +; testl %eax, %eax +; jne 0x34 +; block3: ; offset 0x17 +; movq %rdi, %rax +; jmp 0x2f +; block4: ; offset 0x1f +; testl %edi, %edi +; je 0x2f +; block5: ; offset 0x27 +; movq %rdi, %rax +; jmp 0x34 +; block6: ; offset 0x2f +; movq %rbp, %rsp +; popq %rbp +; retq +; block7: ; offset 0x34 +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/isa/x64/very-carefully-sink-loads-in-float-compares.clif b/cranelift/filetests/filetests/isa/x64/very-carefully-sink-loads-in-float-compares.clif index 2e0769262347..d8564847902a 100644 --- a/cranelift/filetests/filetests/isa/x64/very-carefully-sink-loads-in-float-compares.clif +++ b/cranelift/filetests/filetests/isa/x64/very-carefully-sink-loads-in-float-compares.clif @@ -184,8 +184,8 @@ block1(v8: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movss 0(%rdi), %xmm1 -; ucomiss %xmm1, %xmm0 +; movss 0(%rdi), %xmm6 +; ucomiss %xmm6, %xmm0 ; jp,nz label2; j label1 ; block1: ; movq %rsi, %rax @@ -203,8 +203,8 @@ block1(v8: i32): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movss (%rdi), %xmm1 -; ucomiss %xmm1, %xmm0 +; movss (%rdi), %xmm6 +; ucomiss %xmm6, %xmm0 ; jp 0x1f ; jne 0x1f ; block2: ; offset 0x17 @@ -230,8 +230,8 @@ block1(v8: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movss 0(%rdi), %xmm1 -; ucomiss %xmm0, %xmm1 +; movss 0(%rdi), %xmm6 +; ucomiss %xmm0, %xmm6 ; jp,nz label2; j label1 ; block1: ; movq %rsi, %rax @@ -249,8 +249,8 @@ block1(v8: i32): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movss (%rdi), %xmm1 -; ucomiss %xmm0, %xmm1 +; movss (%rdi), %xmm6 +; ucomiss %xmm0, %xmm6 ; jp 0x1f ; jne 0x1f ; block2: ; offset 0x17 @@ -278,8 +278,8 @@ block2(v7: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movss 0(%rdi), %xmm3 -; ucomiss %xmm3, %xmm0 +; movss 0(%rdi), %xmm7 +; ucomiss %xmm7, %xmm0 ; jp,nz label2; j label1 ; block1: ; movq %rsi, %rax @@ -288,7 +288,7 @@ block2(v7: i32): ; movq %rdx, %rax ; jmp label3 ; block3: -; ucomiss %xmm3, %xmm0 +; ucomiss %xmm7, %xmm0 ; jp,nz label5; j label4 ; block4: ; jmp label6 @@ -305,8 +305,8 @@ block2(v7: i32): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movss (%rdi), %xmm3 -; ucomiss %xmm3, %xmm0 +; movss (%rdi), %xmm7 +; ucomiss %xmm7, %xmm0 ; jp 0x1f ; jne 0x1f ; block2: ; offset 0x17 @@ -315,7 +315,7 @@ block2(v7: i32): ; block3: ; offset 0x1f ; movq %rdx, %rax ; block4: ; offset 0x22 -; ucomiss %xmm3, %xmm0 +; ucomiss %xmm7, %xmm0 ; jp 0x31 ; je 0x34 ; block5: ; offset 0x31 @@ -340,8 +340,8 @@ block2(v7: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movss 0(%rdi), %xmm3 -; ucomiss %xmm0, %xmm3 +; movss 0(%rdi), %xmm7 +; ucomiss %xmm0, %xmm7 ; jp,nz label2; j label1 ; block1: ; movq %rsi, %rax @@ -350,7 +350,7 @@ block2(v7: i32): ; movq %rdx, %rax ; jmp label3 ; block3: -; ucomiss %xmm0, %xmm3 +; ucomiss %xmm0, %xmm7 ; jp,nz label5; j label4 ; block4: ; jmp label6 @@ -367,8 +367,8 @@ block2(v7: i32): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movss (%rdi), %xmm3 -; ucomiss %xmm0, %xmm3 +; movss (%rdi), %xmm7 +; ucomiss %xmm0, %xmm7 ; jp 0x1f ; jne 0x1f ; block2: ; offset 0x17 @@ -377,7 +377,7 @@ block2(v7: i32): ; block3: ; offset 0x1f ; movq %rdx, %rax ; block4: ; offset 0x22 -; ucomiss %xmm0, %xmm3 +; ucomiss %xmm0, %xmm7 ; jp 0x31 ; je 0x34 ; block5: ; offset 0x31 @@ -401,12 +401,12 @@ block1(v7: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movss 0(%rdi), %xmm2 -; ucomiss %xmm2, %xmm0 +; movss 0(%rdi), %xmm1 +; ucomiss %xmm1, %xmm0 ; movq %rsi, %rax ; cmovpl %edx, %eax, %eax ; cmovnzl %edx, %eax, %eax -; ucomiss %xmm2, %xmm0 +; ucomiss %xmm1, %xmm0 ; jp,nz label2; j label1 ; block1: ; jmp label3 @@ -423,12 +423,12 @@ block1(v7: i32): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movss (%rdi), %xmm2 -; ucomiss %xmm2, %xmm0 +; movss (%rdi), %xmm1 +; ucomiss %xmm1, %xmm0 ; movq %rsi, %rax ; cmovpl %edx, %eax ; cmovnel %edx, %eax -; ucomiss %xmm2, %xmm0 +; ucomiss %xmm1, %xmm0 ; jp 0x23 ; je 0x26 ; block2: ; offset 0x23 @@ -452,12 +452,12 @@ block1(v7: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movss 0(%rdi), %xmm2 -; ucomiss %xmm0, %xmm2 +; movss 0(%rdi), %xmm1 +; ucomiss %xmm0, %xmm1 ; movq %rsi, %rax ; cmovpl %edx, %eax, %eax ; cmovnzl %edx, %eax, %eax -; ucomiss %xmm0, %xmm2 +; ucomiss %xmm0, %xmm1 ; jp,nz label2; j label1 ; block1: ; jmp label3 @@ -474,12 +474,12 @@ block1(v7: i32): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movss (%rdi), %xmm2 -; ucomiss %xmm0, %xmm2 +; movss (%rdi), %xmm1 +; ucomiss %xmm0, %xmm1 ; movq %rsi, %rax ; cmovpl %edx, %eax ; cmovnel %edx, %eax -; ucomiss %xmm0, %xmm2 +; ucomiss %xmm0, %xmm1 ; jp 0x23 ; je 0x26 ; block2: ; offset 0x23 diff --git a/tests/disas/epoch-interruption-x86.wat b/tests/disas/epoch-interruption-x86.wat index 2448c0ae5f01..d44f25844205 100644 --- a/tests/disas/epoch-interruption-x86.wat +++ b/tests/disas/epoch-interruption-x86.wat @@ -11,29 +11,29 @@ ;; movq 0x10(%r10), %r10 ;; addq $0x30, %r10 ;; cmpq %rsp, %r10 -;; ja 0x80 +;; ja 0x7e ;; 19: subq $0x20, %rsp -;; movq %rbx, (%rsp) -;; movq %r12, 8(%rsp) -;; movq %r13, 0x10(%rsp) -;; movq 0x20(%rdi), %r12 -;; movq (%r12), %r9 -;; movq 8(%rdi), %rbx -;; movq %rdi, %r13 -;; movq 8(%rbx), %rax -;; cmpq %rax, %r9 -;; jae 0x59 -;; 47: movq (%r12), %rdi -;; cmpq %rax, %rdi -;; jae 0x66 -;; jmp 0x47 -;; 59: movq %r13, %rdi -;; callq 0x107 -;; jmp 0x47 -;; 66: movq 8(%rbx), %rax -;; cmpq %rax, %rdi -;; jb 0x47 -;; 73: movq %r13, %rdi -;; callq 0x107 -;; jmp 0x47 -;; 80: ud2 +;; movq %r13, (%rsp) +;; movq %r14, 8(%rsp) +;; movq %r15, 0x10(%rsp) +;; movq 0x20(%rdi), %r14 +;; movq (%r14), %rcx +;; movq 8(%rdi), %r13 +;; movq %rdi, %r15 +;; movq 8(%r13), %rax +;; cmpq %rax, %rcx +;; jae 0x57 +;; 46: movq (%r14), %r11 +;; cmpq %rax, %r11 +;; jae 0x64 +;; jmp 0x46 +;; 57: movq %r15, %rdi +;; callq 0x105 +;; jmp 0x46 +;; 64: movq 8(%r13), %rax +;; cmpq %rax, %r11 +;; jb 0x46 +;; 71: movq %r15, %rdi +;; callq 0x105 +;; jmp 0x46 +;; 7e: ud2 diff --git a/tests/disas/pulley/coremark-1.wat b/tests/disas/pulley/coremark-1.wat index dff1415ea479..c568c651df30 100644 --- a/tests/disas/pulley/coremark-1.wat +++ b/tests/disas/pulley/coremark-1.wat @@ -70,28 +70,28 @@ (func $other) ) ;; wasm[0]::function[0]: -;; push_frame_save 16, x16 +;; push_frame_save 16, x26 ;; xzero x6 -;; xload64le_o32 x11, x0, 80 -;; xload64le_o32 x13, x0, 88 -;; xload16le_u32_g32 x12, x11, x13, x2, 0 -;; xload16le_u32_g32 x13, x11, x13, x3, 0 +;; xload64le_o32 x7, x0, 80 +;; xload64le_o32 x9, x0, 88 +;; xload16le_u32_g32 x8, x7, x9, x2, 0 +;; xload16le_u32_g32 x9, x7, x9, x3, 0 ;; xsub32_u8 x4, x4, 1 -;; xmul32 x12, x12, x13 -;; xshr32_u_u6 x13, x12, 2 -;; xband32_s8 x13, x13, 15 -;; xshr32_u_u6 x12, x12, 5 -;; xband32_s8 x12, x12, 127 -;; xmadd32 x6, x13, x12, x6 -;; xmov x16, x6 +;; xmul32 x8, x8, x9 +;; xshr32_u_u6 x9, x8, 2 +;; xband32_s8 x9, x9, 15 +;; xshr32_u_u6 x8, x8, 5 +;; xband32_s8 x8, x8, 127 +;; xmadd32 x6, x9, x8, x6 +;; xmov x26, x6 ;; xadd32 x2, x2, x5 ;; xadd32_u8 x3, x3, 2 ;; br_if_not32 x4, 0xe // target = 0x53 -;; 4b: xmov x6, x16 +;; 4b: xmov x6, x26 ;; jump -0x40 // target = 0xe ;; 53: call2 x0, x0, 0x10 // target = 0x63 -;; xmov x0, x16 -;; pop_frame_restore 16, x16 +;; xmov x0, x26 +;; pop_frame_restore 16, x26 ;; ret ;; ;; wasm[0]::function[1]::other: