@@ -203,89 +203,93 @@ define <12 x float> @abp90c12(<12 x float> %a, <12 x float> %b, <12 x float> %c)
203203; CHECK: // %bb.0: // %entry
204204; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
205205; CHECK-NEXT: // kill: def $s3 killed $s3 def $q3
206- ; CHECK-NEXT: ldr s17, [sp, #40]
207- ; CHECK-NEXT: add x10, sp, #56
208206; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
207+ ; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2
208+ ; CHECK-NEXT: ldr s17, [sp, #32]
209+ ; CHECK-NEXT: // kill: def $s5 killed $s5 def $q5
209210; CHECK-NEXT: add x9, sp, #48
211+ ; CHECK-NEXT: add x10, sp, #64
210212; CHECK-NEXT: mov v1.s[1], v3.s[0]
211- ; CHECK-NEXT: ldr s3, [sp, #32]
212- ; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2
213213; CHECK-NEXT: mov v0.s[1], v2.s[0]
214- ; CHECK-NEXT: ld1 { v17.s }[1], [x10]
215- ; CHECK-NEXT: // kill: def $s5 killed $s5 def $q5
216- ; CHECK-NEXT: ldr s16, [sp, #8]
217214; CHECK-NEXT: // kill: def $s4 killed $s4 def $q4
218- ; CHECK-NEXT: add x10, sp, #24
219- ; CHECK-NEXT: ld1 { v3.s }[1], [x9]
220- ; CHECK-NEXT: add x9, sp, #72
221- ; CHECK-NEXT: // kill: def $s7 killed $s7 def $q7
215+ ; CHECK-NEXT: add x11, sp, #72
216+ ; CHECK-NEXT: ld1 { v17.s }[1], [x9]
217+ ; CHECK-NEXT: ldr s18, [x10]
218+ ; CHECK-NEXT: add x9, sp, #80
219+ ; CHECK-NEXT: add x10, sp, #56
222220; CHECK-NEXT: // kill: def $s6 killed $s6 def $q6
221+ ; CHECK-NEXT: // kill: def $s7 killed $s7 def $q7
222+ ; CHECK-NEXT: ldr s16, [sp, #8]
223+ ; CHECK-NEXT: ldr s3, [sp, #96]
224+ ; CHECK-NEXT: ld1 { v18.s }[1], [x9]
225+ ; CHECK-NEXT: add x9, sp, #88
223226; CHECK-NEXT: ldr s2, [sp]
224- ; CHECK-NEXT: ld1 { v16.s }[1], [x10]
225- ; CHECK-NEXT: add x10, sp, #112
226- ; CHECK-NEXT: ldr s20, [sp, #136]
227227; CHECK-NEXT: mov v1.s[2], v5.s[0]
228- ; CHECK-NEXT: ld1 { v17.s }[2], [x9]
229- ; CHECK-NEXT: add x9, sp, #64
230- ; CHECK-NEXT: ldr s5, [sp, #96]
231- ; CHECK-NEXT: ld1 { v3.s }[2], [x9]
228+ ; CHECK-NEXT: ldr s5, [sp, #40]
232229; CHECK-NEXT: mov v0.s[2], v4.s[0]
233- ; CHECK-NEXT: add x9, sp, #88
234- ; CHECK-NEXT: ldr s4, [sp, #104]
235- ; CHECK-NEXT: ldr s19, [sp, #192]
236230; CHECK-NEXT: ld1 { v5.s }[1], [x10]
237- ; CHECK-NEXT: add x10, sp, #80
238- ; CHECK-NEXT: ld1 { v17.s }[3], [x9]
239- ; CHECK-NEXT: mov v1.s[3], v7.s[0]
240- ; CHECK-NEXT: add x9, sp, #120
241- ; CHECK-NEXT: ld1 { v3.s }[3], [x10]
242- ; CHECK-NEXT: ld1 { v4.s }[1], [x9]
243- ; CHECK-NEXT: ldr s7, [sp, #128]
231+ ; CHECK-NEXT: ldr s19, [x11]
244232; CHECK-NEXT: add x10, sp, #144
233+ ; CHECK-NEXT: zip1 v4.2d, v17.2d, v18.2d
234+ ; CHECK-NEXT: add x11, sp, #160
235+ ; CHECK-NEXT: ldr s18, [sp, #136]
236+ ; CHECK-NEXT: ld1 { v19.s }[1], [x9]
245237; CHECK-NEXT: mov v0.s[3], v6.s[0]
246- ; CHECK-NEXT: add x9, sp, #16
238+ ; CHECK-NEXT: ldr s6, [sp, #128]
239+ ; CHECK-NEXT: mov v1.s[3], v7.s[0]
240+ ; CHECK-NEXT: add x9, sp, #24
241+ ; CHECK-NEXT: ldr s7, [sp, #104]
242+ ; CHECK-NEXT: ld1 { v16.s }[1], [x9]
243+ ; CHECK-NEXT: add x9, sp, #112
244+ ; CHECK-NEXT: ld1 { v6.s }[1], [x10]
245+ ; CHECK-NEXT: zip1 v5.2d, v5.2d, v19.2d
246+ ; CHECK-NEXT: add x10, sp, #120
247+ ; CHECK-NEXT: ld1 { v3.s }[1], [x9]
247248; CHECK-NEXT: ld1 { v7.s }[1], [x10]
248- ; CHECK-NEXT: ld1 { v2.s }[1], [x9]
249- ; CHECK-NEXT: add x9, sp, #160
250- ; CHECK-NEXT: fmul v6.4s, v17.4s, v1.4s
251- ; CHECK-NEXT: fmul v18.4s, v4.4s, v16.4s
252- ; CHECK-NEXT: fmul v16.4s, v5.4s, v16.4s
253- ; CHECK-NEXT: fmul v1.4s, v3.4s, v1.4s
254- ; CHECK-NEXT: add x10, sp, #208
255- ; CHECK-NEXT: ld1 { v7.s }[2], [x9]
256- ; CHECK-NEXT: add x9, sp, #152
257- ; CHECK-NEXT: ld1 { v19.s }[1], [x10]
258- ; CHECK-NEXT: ld1 { v20.s }[1], [x9]
249+ ; CHECK-NEXT: ldr s17, [x11]
259250; CHECK-NEXT: add x9, sp, #176
260- ; CHECK-NEXT: add x10, sp, #184
261- ; CHECK-NEXT: fneg v6.4s, v6.4s
262- ; CHECK-NEXT: fneg v18.4s, v18.4s
263- ; CHECK-NEXT: fmla v16.4s, v2.4s, v4.4s
264- ; CHECK-NEXT: fmla v1.4s, v0.4s, v17.4s
265- ; CHECK-NEXT: ld1 { v7.s }[3], [x9]
266- ; CHECK-NEXT: add x9, sp, #168
267- ; CHECK-NEXT: ld1 { v20.s }[2], [x9]
268- ; CHECK-NEXT: ldr s4, [sp, #200]
251+ ; CHECK-NEXT: add x10, sp, #16
252+ ; CHECK-NEXT: add x11, sp, #168
253+ ; CHECK-NEXT: ld1 { v17.s }[1], [x9]
254+ ; CHECK-NEXT: ld1 { v2.s }[1], [x10]
255+ ; CHECK-NEXT: add x9, sp, #152
256+ ; CHECK-NEXT: fmul v19.4s, v5.4s, v1.4s
257+ ; CHECK-NEXT: fmul v20.4s, v7.4s, v16.4s
258+ ; CHECK-NEXT: fmul v16.4s, v3.4s, v16.4s
259+ ; CHECK-NEXT: fmul v1.4s, v4.4s, v1.4s
260+ ; CHECK-NEXT: ld1 { v18.s }[1], [x9]
261+ ; CHECK-NEXT: ldr s21, [x11]
262+ ; CHECK-NEXT: zip1 v6.2d, v6.2d, v17.2d
263+ ; CHECK-NEXT: ldr s17, [sp, #192]
264+ ; CHECK-NEXT: add x9, sp, #184
265+ ; CHECK-NEXT: add x10, sp, #208
266+ ; CHECK-NEXT: ld1 { v21.s }[1], [x9]
269267; CHECK-NEXT: add x9, sp, #216
270- ; CHECK-NEXT: fmla v6.4s, v0.4s, v3.4s
271- ; CHECK-NEXT: fmla v18.4s, v2.4s, v5.4s
272- ; CHECK-NEXT: ld1 { v4.s }[1], [x9]
273- ; CHECK-NEXT: fsub v0.4s, v7.4s, v1.4s
274- ; CHECK-NEXT: fsub v1.4s, v19.4s, v16.4s
275- ; CHECK-NEXT: ld1 { v20.s }[3], [x10]
276- ; CHECK-NEXT: fadd v2.4s, v4.4s, v18.4s
277- ; CHECK-NEXT: fadd v3.4s, v20.4s, v6.4s
268+ ; CHECK-NEXT: fneg v19.4s, v19.4s
269+ ; CHECK-NEXT: fneg v20.4s, v20.4s
270+ ; CHECK-NEXT: fmla v16.4s, v2.4s, v7.4s
271+ ; CHECK-NEXT: fmla v1.4s, v0.4s, v5.4s
272+ ; CHECK-NEXT: ld1 { v17.s }[1], [x10]
273+ ; CHECK-NEXT: ldr s5, [sp, #200]
274+ ; CHECK-NEXT: zip1 v7.2d, v18.2d, v21.2d
275+ ; CHECK-NEXT: ld1 { v5.s }[1], [x9]
276+ ; CHECK-NEXT: fmla v19.4s, v0.4s, v4.4s
277+ ; CHECK-NEXT: fmla v20.4s, v2.4s, v3.4s
278+ ; CHECK-NEXT: fsub v0.4s, v6.4s, v1.4s
279+ ; CHECK-NEXT: fsub v1.4s, v17.4s, v16.4s
280+ ; CHECK-NEXT: fadd v2.4s, v7.4s, v19.4s
281+ ; CHECK-NEXT: fadd v3.4s, v5.4s, v20.4s
278282; CHECK-NEXT: ext v4.16b, v0.16b, v1.16b, #12
279- ; CHECK-NEXT: ext v5.16b, v3 .16b, v2 .16b, #12
280- ; CHECK-NEXT: trn2 v1.4s, v1.4s, v2 .4s
283+ ; CHECK-NEXT: ext v5.16b, v2 .16b, v3 .16b, #12
284+ ; CHECK-NEXT: trn2 v1.4s, v1.4s, v3 .4s
281285; CHECK-NEXT: ext v4.16b, v0.16b, v4.16b, #12
282- ; CHECK-NEXT: ext v5.16b, v3 .16b, v5.16b, #8
286+ ; CHECK-NEXT: ext v5.16b, v2 .16b, v5.16b, #8
283287; CHECK-NEXT: rev64 v4.4s, v4.4s
284- ; CHECK-NEXT: trn2 v2 .4s, v4.4s, v5.4s
285- ; CHECK-NEXT: zip2 v4.4s, v0.4s, v3 .4s
286- ; CHECK-NEXT: zip1 v0.4s, v0.4s, v3 .4s
287- ; CHECK-NEXT: ext v1.16b, v2 .16b, v1.16b, #8
288- ; CHECK-NEXT: mov v4.d[1], v2 .d[0]
288+ ; CHECK-NEXT: trn2 v3 .4s, v4.4s, v5.4s
289+ ; CHECK-NEXT: zip2 v4.4s, v0.4s, v2 .4s
290+ ; CHECK-NEXT: zip1 v0.4s, v0.4s, v2 .4s
291+ ; CHECK-NEXT: ext v1.16b, v3 .16b, v1.16b, #8
292+ ; CHECK-NEXT: mov v4.d[1], v3 .d[0]
289293; CHECK-NEXT: str q0, [x8]
290294; CHECK-NEXT: stp q4, q1, [x8, #16]
291295; CHECK-NEXT: ret
0 commit comments