Skip to content

Commit 6d6df6b

Browse files
committed
[CIR][CIRGen][Builtin][X86] Lower avx512 scatter intrinsics
1 parent aed448e commit 6d6df6b

File tree

3 files changed

+505
-2
lines changed

3 files changed

+505
-2
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -735,8 +735,99 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
735735
case X86::BI__builtin_ia32_scattersiv4sf:
736736
case X86::BI__builtin_ia32_scattersiv4si:
737737
case X86::BI__builtin_ia32_scattersiv8sf:
738-
case X86::BI__builtin_ia32_scattersiv8si:
739-
llvm_unreachable("scattersiv8df NYI");
738+
case X86::BI__builtin_ia32_scattersiv8si: {
739+
740+
llvm::StringRef intrinsicName;
741+
742+
switch (BuiltinID) {
743+
default:
744+
llvm_unreachable("Unexpected builtin");
745+
case X86::BI__builtin_ia32_scattersiv8df:
746+
intrinsicName = "x86.avx512.mask.scatter.dpd.512";
747+
break;
748+
case X86::BI__builtin_ia32_scattersiv16sf:
749+
intrinsicName = "x86.avx512.mask.scatter.dps.512";
750+
break;
751+
case X86::BI__builtin_ia32_scatterdiv8df:
752+
intrinsicName = "x86.avx512.mask.scatter.qpd.512";
753+
break;
754+
case X86::BI__builtin_ia32_scatterdiv16sf:
755+
intrinsicName = "x86.avx512.mask.scatter.qps.512";
756+
break;
757+
case X86::BI__builtin_ia32_scattersiv8di:
758+
intrinsicName = "x86.avx512.mask.scatter.dpq.512";
759+
break;
760+
case X86::BI__builtin_ia32_scattersiv16si:
761+
intrinsicName = "x86.avx512.mask.scatter.dpi.512";
762+
break;
763+
case X86::BI__builtin_ia32_scatterdiv8di:
764+
intrinsicName = "x86.avx512.mask.scatter.qpq.512";
765+
break;
766+
case X86::BI__builtin_ia32_scatterdiv16si:
767+
intrinsicName = "x86.avx512.mask.scatter.qpi.512";
768+
break;
769+
case X86::BI__builtin_ia32_scatterdiv2df:
770+
intrinsicName = "x86.avx512.mask.scatterdiv2.df";
771+
break;
772+
case X86::BI__builtin_ia32_scatterdiv2di:
773+
intrinsicName = "x86.avx512.mask.scatterdiv2.di";
774+
break;
775+
case X86::BI__builtin_ia32_scatterdiv4df:
776+
intrinsicName = "x86.avx512.mask.scatterdiv4.df";
777+
break;
778+
case X86::BI__builtin_ia32_scatterdiv4di:
779+
intrinsicName = "x86.avx512.mask.scatterdiv4.di";
780+
break;
781+
case X86::BI__builtin_ia32_scatterdiv4sf:
782+
intrinsicName = "x86.avx512.mask.scatterdiv4.sf";
783+
break;
784+
case X86::BI__builtin_ia32_scatterdiv4si:
785+
intrinsicName = "x86.avx512.mask.scatterdiv4.si";
786+
break;
787+
case X86::BI__builtin_ia32_scatterdiv8sf:
788+
intrinsicName = "x86.avx512.mask.scatterdiv8.sf";
789+
break;
790+
case X86::BI__builtin_ia32_scatterdiv8si:
791+
intrinsicName = "x86.avx512.mask.scatterdiv8.si";
792+
break;
793+
case X86::BI__builtin_ia32_scattersiv2df:
794+
intrinsicName = "x86.avx512.mask.scattersiv2.df";
795+
break;
796+
case X86::BI__builtin_ia32_scattersiv2di:
797+
intrinsicName = "x86.avx512.mask.scattersiv2.di";
798+
break;
799+
case X86::BI__builtin_ia32_scattersiv4df:
800+
intrinsicName = "x86.avx512.mask.scattersiv4.df";
801+
break;
802+
case X86::BI__builtin_ia32_scattersiv4di:
803+
intrinsicName = "x86.avx512.mask.scattersiv4.di";
804+
break;
805+
case X86::BI__builtin_ia32_scattersiv4sf:
806+
intrinsicName = "x86.avx512.mask.scattersiv4.sf";
807+
break;
808+
case X86::BI__builtin_ia32_scattersiv4si:
809+
intrinsicName = "x86.avx512.mask.scattersiv4.si";
810+
break;
811+
case X86::BI__builtin_ia32_scattersiv8sf:
812+
intrinsicName = "x86.avx512.mask.scattersiv8.sf";
813+
break;
814+
case X86::BI__builtin_ia32_scattersiv8si:
815+
intrinsicName = "x86.avx512.mask.scattersiv8.si";
816+
break;
817+
}
818+
819+
unsigned minElts =
820+
std::min(cast<cir::VectorType>(Ops[2].getType()).getSize(),
821+
cast<cir::VectorType>(Ops[3].getType()).getSize());
822+
Ops[1] = getMaskVecValue(*this, Ops[1], minElts, getLoc(E->getExprLoc()));
823+
824+
return builder
825+
.create<cir::LLVMIntrinsicCallOp>(
826+
getLoc(E->getExprLoc()), builder.getStringAttr(intrinsicName.str()),
827+
builder.getVoidTy(), Ops)
828+
.getResult();
829+
}
830+
740831
case X86::BI__builtin_ia32_vextractf128_pd256:
741832
case X86::BI__builtin_ia32_vextractf128_ps256:
742833
case X86::BI__builtin_ia32_vextractf128_si256:

clang/test/CIR/CodeGen/X86/avx512f-builtins.c

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,3 +337,128 @@ __m512i test_mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) {
337337
// LLVM: @llvm.masked.expandload.v16i32(ptr %{{.*}}, <16 x i1> %{{.*}}, <16 x i32> %{{.*}})
338338
return _mm512_maskz_expandloadu_epi32(__U, __P);
339339
}
340+
void test_mm512_i32scatter_pd(void *__addr, __m256i __index, __m512d __v1) {
341+
// CIR-LABEL: test_mm512_i32scatter_pd
342+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpd.512"
343+
344+
// LLVM-LABEL: test_mm512_i32scatter_pd
345+
// LLVM: @llvm.x86.avx512.mask.scatter.dpd.512
346+
return _mm512_i32scatter_pd(__addr, __index, __v1, 2);
347+
}
348+
349+
void test_mm512_mask_i32scatter_pd(void *__addr, __mmask8 __mask, __m256i __index, __m512d __v1) {
350+
// CIR-LABEL: test_mm512_mask_i32scatter_pd
351+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpd.512"
352+
353+
// LLVM-LABEL: test_mm512_mask_i32scatter_pd
354+
// LLVM: @llvm.x86.avx512.mask.scatter.dpd.512
355+
return _mm512_mask_i32scatter_pd(__addr, __mask, __index, __v1, 2);
356+
}
357+
358+
void test_mm512_i32scatter_ps(void *__addr, __m512i __index, __m512 __v1) {
359+
// CIR-LABEL: test_mm512_i32scatter_ps
360+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dps.512"
361+
362+
// LLVM-LABEL: test_mm512_i32scatter_ps
363+
// LLVM: @llvm.x86.avx512.mask.scatter.dps.512
364+
return _mm512_i32scatter_ps(__addr, __index, __v1, 2);
365+
}
366+
367+
void test_mm512_mask_i32scatter_ps(void *__addr, __mmask16 __mask, __m512i __index, __m512 __v1) {
368+
// CIR-LABEL: test_mm512_mask_i32scatter_ps
369+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dps.512"
370+
371+
// LLVM-LABEL: test_mm512_mask_i32scatter_ps
372+
// LLVM: @llvm.x86.avx512.mask.scatter.dps.512
373+
return _mm512_mask_i32scatter_ps(__addr, __mask, __index, __v1, 2);
374+
}
375+
376+
void test_mm512_i64scatter_pd(void *__addr, __m512i __index, __m512d __v1) {
377+
// CIR-LABEL: test_mm512_i64scatter_pd
378+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpd.512"
379+
380+
// LLVM-LABEL: test_mm512_i64scatter_pd
381+
// LLVM: @llvm.x86.avx512.mask.scatter.qpd.512
382+
return _mm512_i64scatter_pd(__addr, __index, __v1, 2);
383+
}
384+
385+
void test_mm512_mask_i64scatter_pd(void *__addr, __mmask8 __mask, __m512i __index, __m512d __v1) {
386+
// CIR-LABEL: test_mm512_mask_i64scatter_pd
387+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpd.512"
388+
389+
// LLVM-LABEL: test_mm512_mask_i64scatter_pd
390+
// LLVM: @llvm.x86.avx512.mask.scatter.qpd.512
391+
return _mm512_mask_i64scatter_pd(__addr, __mask, __index, __v1, 2);
392+
}
393+
394+
void test_mm512_i64scatter_ps(void *__addr, __m512i __index, __m256 __v1) {
395+
// CIR-LABEL: test_mm512_i64scatter_ps
396+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qps.512"
397+
398+
// LLVM-LABEL: test_mm512_i64scatter_ps
399+
// LLVM: @llvm.x86.avx512.mask.scatter.qps.512
400+
return _mm512_i64scatter_ps(__addr, __index, __v1, 2);
401+
}
402+
403+
void test_mm512_mask_i64scatter_ps(void *__addr, __mmask8 __mask, __m512i __index, __m256 __v1) {
404+
// CIR-LABEL: test_mm512_mask_i64scatter_ps
405+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qps.512"
406+
407+
// LLVM-LABEL: test_mm512_mask_i64scatter_ps
408+
// LLVM: @llvm.x86.avx512.mask.scatter.qps.512
409+
return _mm512_mask_i64scatter_ps(__addr, __mask, __index, __v1, 2);
410+
}
411+
412+
void test_mm512_i32scatter_epi32(void *__addr, __m512i __index, __m512i __v1) {
413+
// CIR-LABEL: test_mm512_i32scatter_epi32
414+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpi.512"
415+
416+
// LLVM-LABEL: test_mm512_i32scatter_epi32
417+
// LLVM: @llvm.x86.avx512.mask.scatter.dpi.512
418+
return _mm512_i32scatter_epi32(__addr, __index, __v1, 2);
419+
}
420+
421+
void test_mm512_mask_i32scatter_epi32(void *__addr, __mmask16 __mask, __m512i __index, __m512i __v1) {
422+
// CIR-LABEL: test_mm512_mask_i32scatter_epi32
423+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.dpi.512"
424+
425+
// LLVM-LABEL: test_mm512_mask_i32scatter_epi32
426+
// LLVM: @llvm.x86.avx512.mask.scatter.dpi.512
427+
return _mm512_mask_i32scatter_epi32(__addr, __mask, __index, __v1, 2);
428+
}
429+
430+
void test_mm512_i64scatter_epi64(void *__addr, __m512i __index, __m512i __v1) {
431+
// CIR-LABEL: test_mm512_i64scatter_epi64
432+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpq.512"
433+
434+
// LLVM-LABEL: test_mm512_i64scatter_epi64
435+
// LLVM: @llvm.x86.avx512.mask.scatter.qpq.512
436+
return _mm512_i64scatter_epi64(__addr, __index, __v1, 2);
437+
}
438+
439+
void test_mm512_mask_i64scatter_epi64(void *__addr, __mmask8 __mask, __m512i __index, __m512i __v1) {
440+
// CIR-LABEL: test_mm512_mask_i64scatter_epi64
441+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpq.512"
442+
443+
// LLVM-LABEL: test_mm512_mask_i64scatter_epi64
444+
// LLVM: @llvm.x86.avx512.mask.scatter.qpq.512
445+
return _mm512_mask_i64scatter_epi64(__addr, __mask, __index, __v1, 2);
446+
}
447+
448+
void test_mm512_i64scatter_epi32(void *__addr, __m512i __index, __m256i __v1) {
449+
// CIR-LABEL: test_mm512_i64scatter_epi32
450+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpi.512"
451+
452+
// LLVM-LABEL: test_mm512_i64scatter_epi32
453+
// LLVM: @llvm.x86.avx512.mask.scatter.qpi.512
454+
return _mm512_i64scatter_epi32(__addr, __index, __v1, 2);
455+
}
456+
457+
void test_mm512_mask_i64scatter_epi32(void *__addr, __mmask8 __mask, __m512i __index, __m256i __v1) {
458+
// CIR-LABEL: test_mm512_mask_i64scatter_epi32
459+
// CIR: cir.llvm.intrinsic "x86.avx512.mask.scatter.qpi.512"
460+
461+
// LLVM-LABEL: test_mm512_mask_i64scatter_epi32
462+
// LLVM: @llvm.x86.avx512.mask.scatter.qpi.512
463+
return _mm512_mask_i64scatter_epi32(__addr, __mask, __index, __v1, 2);
464+
}

0 commit comments

Comments
 (0)