diff --git a/llvm/test/Assembler/scalable-vector-struct.ll b/llvm/test/Assembler/scalable-vector-struct.ll index d0b49b11b7fad..a16048335dbf8 100644 --- a/llvm/test/Assembler/scalable-vector-struct.ll +++ b/llvm/test/Assembler/scalable-vector-struct.ll @@ -1,23 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; ifndef INTEL_SYCL_OPAQUEPOINTER_READY ; RUN: opt -passes=verify -S < %s 2>&1 | FileCheck %s -; endif %struct.test = type { , } -define @load(%struct.test* %x) { +define @load(ptr %x) { ; CHECK-LABEL: define @load ; CHECK-SAME: (ptr [[X:%.*]]) { ; CHECK-NEXT: [[A:%.*]] = load [[STRUCT_TEST:%.*]], ptr [[X]], align 4 ; CHECK-NEXT: [[B:%.*]] = extractvalue [[STRUCT_TEST]] [[A]], 1 ; CHECK-NEXT: ret [[B]] ; - %a = load %struct.test, %struct.test* %x + %a = load %struct.test, ptr %x %b = extractvalue %struct.test %a, 1 ret %b } -define void @store(%struct.test* %x, %y, %z) { +define void @store(ptr %x, %y, %z) { ; CHECK-LABEL: define void @store ; CHECK-SAME: (ptr [[X:%.*]], [[Y:%.*]], [[Z:%.*]]) { ; CHECK-NEXT: [[A:%.*]] = insertvalue [[STRUCT_TEST:%.*]] undef, [[Y]], 0 @@ -27,6 +25,6 @@ define void @store(%struct.test* %x, %y, % ; %a = insertvalue %struct.test undef, %y, 0 %b = insertvalue %struct.test %a, %z, 1 - store %struct.test %b, %struct.test* %x + store %struct.test %b, ptr %x ret void } diff --git a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-triple.ll b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-triple.ll index 9b1d5305a7696..1e094bbc91097 100644 --- a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-triple.ll +++ b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-triple.ll @@ -1,9 +1,7 @@ ; This test checks that the Local Accessor to Shared Memory pass runs with the ; `amdgcn-amd-amdhsa` triple, but not with `amdgcn-amd-amdpal`. -; ifndef INTEL_SYCL_OPAQUEPOINTER_READY ; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefix=CHECK-VALID %s ; RUN: llc -mtriple=amdgcn-amd-amdpal < %s | FileCheck --check-prefix=CHECK-INVALID %s -; end ; ModuleID = 'local-accessor-to-shared-memory-triple.ll' source_filename = "local-accessor-to-shared-memory-triple.ll" diff --git a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll index e47851320ff1c..3f00cd23d6475 100644 --- a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll +++ b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll @@ -1,9 +1,7 @@ ; This test checks that the Local Accessor to Shared Memory pass runs with the ; `amdgcn-amd-amdhsa` triple and does not if the option is not present. -; ifndef INTEL_SYCL_OPAQUEPOINTER_READY ; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefix=CHECK-OPT %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefix=CHECK-OPT %s -; end ; ModuleID = 'local-accessor-to-shared-memory-valid-triple.ll' source_filename = "local-accessor-to-shared-memory-valid-triple.ll" diff --git a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll index d013858df0cbe..5fc89702456cf 100644 --- a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll +++ b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll @@ -1,12 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; ifndef INTEL_SYCL_OPAQUEPOINTER_READY ; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=avx512vl | FileCheck %s -; end ; Test that we can unfold constant pool loads when we're using avx512's ; ability to fold a broadcast load into an operation. -define void @bcast_unfold_add_v16i32(i32* %arg) { +define void @bcast_unfold_add_v16i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_add_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -26,12 +24,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp4 = bitcast i32* %tmp3 to <16 x i32>* - %tmp5 = load <16 x i32>, <16 x i32>* %tmp4, align 4 + %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <16 x i32>, ptr %tmp4, align 4 %tmp6 = add nsw <16 x i32> %tmp5, - %tmp7 = bitcast i32* %tmp3 to <16 x i32>* - store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp3 to ptr + store <16 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 16 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -40,7 +38,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_add_v8i32(i32* %arg) { +define void @bcast_unfold_add_v8i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_add_v8i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -60,12 +58,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp4 = bitcast i32* %tmp3 to <8 x i32>* - %tmp5 = load <8 x i32>, <8 x i32>* %tmp4, align 4 + %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <8 x i32>, ptr %tmp4, align 4 %tmp6 = add nsw <8 x i32> %tmp5, - %tmp7 = bitcast i32* %tmp3 to <8 x i32>* - store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp3 to ptr + store <8 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -74,7 +72,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_add_v4i32(i32* %arg) { +define void @bcast_unfold_add_v4i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_add_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -93,12 +91,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp4 = bitcast i32* %tmp3 to <4 x i32>* - %tmp5 = load <4 x i32>, <4 x i32>* %tmp4, align 4 + %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <4 x i32>, ptr %tmp4, align 4 %tmp6 = add nsw <4 x i32> %tmp5, - %tmp7 = bitcast i32* %tmp3 to <4 x i32>* - store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp3 to ptr + store <4 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -107,7 +105,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_add_v8i64(i64* %arg) { +define void @bcast_unfold_add_v8i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_add_v8i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -127,12 +125,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp4 = bitcast i64* %tmp3 to <8 x i64>* - %tmp5 = load <8 x i64>, <8 x i64>* %tmp4, align 8 + %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <8 x i64>, ptr %tmp4, align 8 %tmp6 = add nsw <8 x i64> %tmp5, - %tmp7 = bitcast i64* %tmp3 to <8 x i64>* - store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp3 to ptr + store <8 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -141,7 +139,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_add_v4i64(i64* %arg) { +define void @bcast_unfold_add_v4i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_add_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -161,12 +159,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp4 = bitcast i64* %tmp3 to <4 x i64>* - %tmp5 = load <4 x i64>, <4 x i64>* %tmp4, align 8 + %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <4 x i64>, ptr %tmp4, align 8 %tmp6 = add nsw <4 x i64> %tmp5, - %tmp7 = bitcast i64* %tmp3 to <4 x i64>* - store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp3 to ptr + store <4 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -175,7 +173,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_add_v2i64(i64* %arg) { +define void @bcast_unfold_add_v2i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_add_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -194,12 +192,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp4 = bitcast i64* %tmp3 to <2 x i64>* - %tmp5 = load <2 x i64>, <2 x i64>* %tmp4, align 8 + %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <2 x i64>, ptr %tmp4, align 8 %tmp6 = add nsw <2 x i64> %tmp5, - %tmp7 = bitcast i64* %tmp3 to <2 x i64>* - store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp3 to ptr + store <2 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 2 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -208,7 +206,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_mul_v16i32(i32* %arg) { +define void @bcast_unfold_mul_v16i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_mul_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -229,12 +227,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp4 = bitcast i32* %tmp3 to <16 x i32>* - %tmp5 = load <16 x i32>, <16 x i32>* %tmp4, align 4 + %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <16 x i32>, ptr %tmp4, align 4 %tmp6 = mul nsw <16 x i32> %tmp5, - %tmp7 = bitcast i32* %tmp3 to <16 x i32>* - store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp3 to ptr + store <16 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 16 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -243,7 +241,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_mul_v8i32(i32* %arg) { +define void @bcast_unfold_mul_v8i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_mul_v8i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -264,12 +262,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp4 = bitcast i32* %tmp3 to <8 x i32>* - %tmp5 = load <8 x i32>, <8 x i32>* %tmp4, align 4 + %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <8 x i32>, ptr %tmp4, align 4 %tmp6 = mul nsw <8 x i32> %tmp5, - %tmp7 = bitcast i32* %tmp3 to <8 x i32>* - store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp3 to ptr + store <8 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -278,7 +276,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_mul_v4i32(i32* %arg) { +define void @bcast_unfold_mul_v4i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_mul_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -298,12 +296,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp4 = bitcast i32* %tmp3 to <4 x i32>* - %tmp5 = load <4 x i32>, <4 x i32>* %tmp4, align 4 + %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <4 x i32>, ptr %tmp4, align 4 %tmp6 = mul nsw <4 x i32> %tmp5, - %tmp7 = bitcast i32* %tmp3 to <4 x i32>* - store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp3 to ptr + store <4 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -312,7 +310,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_mul_v8i64(i64* %arg) { +define void @bcast_unfold_mul_v8i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_mul_v8i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -333,12 +331,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp4 = bitcast i64* %tmp3 to <8 x i64>* - %tmp5 = load <8 x i64>, <8 x i64>* %tmp4, align 8 + %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <8 x i64>, ptr %tmp4, align 8 %tmp6 = mul nsw <8 x i64> %tmp5, - %tmp7 = bitcast i64* %tmp3 to <8 x i64>* - store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp3 to ptr + store <8 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -347,7 +345,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_mul_v4i64(i64* %arg) { +define void @bcast_unfold_mul_v4i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_mul_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -368,12 +366,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp4 = bitcast i64* %tmp3 to <4 x i64>* - %tmp5 = load <4 x i64>, <4 x i64>* %tmp4, align 8 + %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <4 x i64>, ptr %tmp4, align 8 %tmp6 = mul nsw <4 x i64> %tmp5, - %tmp7 = bitcast i64* %tmp3 to <4 x i64>* - store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp3 to ptr + store <4 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -382,7 +380,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_mul_v2i64(i64* %arg) { +define void @bcast_unfold_mul_v2i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_mul_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -402,12 +400,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp4 = bitcast i64* %tmp3 to <2 x i64>* - %tmp5 = load <2 x i64>, <2 x i64>* %tmp4, align 8 + %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <2 x i64>, ptr %tmp4, align 8 %tmp6 = mul nsw <2 x i64> %tmp5, - %tmp7 = bitcast i64* %tmp3 to <2 x i64>* - store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp3 to ptr + store <2 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 2 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -416,7 +414,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_or_v16i32(i32* %arg) { +define void @bcast_unfold_or_v16i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_or_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -436,12 +434,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp4 = bitcast i32* %tmp3 to <16 x i32>* - %tmp5 = load <16 x i32>, <16 x i32>* %tmp4, align 4 + %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <16 x i32>, ptr %tmp4, align 4 %tmp6 = or <16 x i32> %tmp5, - %tmp7 = bitcast i32* %tmp3 to <16 x i32>* - store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp3 to ptr + store <16 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 16 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -450,7 +448,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_or_v8i32(i32* %arg) { +define void @bcast_unfold_or_v8i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_or_v8i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -470,12 +468,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp4 = bitcast i32* %tmp3 to <8 x i32>* - %tmp5 = load <8 x i32>, <8 x i32>* %tmp4, align 4 + %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <8 x i32>, ptr %tmp4, align 4 %tmp6 = or <8 x i32> %tmp5, - %tmp7 = bitcast i32* %tmp3 to <8 x i32>* - store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp3 to ptr + store <8 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -484,7 +482,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_or_v4i32(i32* %arg) { +define void @bcast_unfold_or_v4i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_or_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -503,12 +501,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp4 = bitcast i32* %tmp3 to <4 x i32>* - %tmp5 = load <4 x i32>, <4 x i32>* %tmp4, align 4 + %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <4 x i32>, ptr %tmp4, align 4 %tmp6 = or <4 x i32> %tmp5, - %tmp7 = bitcast i32* %tmp3 to <4 x i32>* - store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp3 to ptr + store <4 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -517,7 +515,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_or_v8i64(i64* %arg) { +define void @bcast_unfold_or_v8i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_or_v8i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -537,12 +535,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp4 = bitcast i64* %tmp3 to <8 x i64>* - %tmp5 = load <8 x i64>, <8 x i64>* %tmp4, align 8 + %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <8 x i64>, ptr %tmp4, align 8 %tmp6 = or <8 x i64> %tmp5, - %tmp7 = bitcast i64* %tmp3 to <8 x i64>* - store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp3 to ptr + store <8 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -551,7 +549,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_or_v4i64(i64* %arg) { +define void @bcast_unfold_or_v4i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_or_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -571,12 +569,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp4 = bitcast i64* %tmp3 to <4 x i64>* - %tmp5 = load <4 x i64>, <4 x i64>* %tmp4, align 8 + %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <4 x i64>, ptr %tmp4, align 8 %tmp6 = or <4 x i64> %tmp5, - %tmp7 = bitcast i64* %tmp3 to <4 x i64>* - store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp3 to ptr + store <4 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -585,7 +583,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_or_v2i64(i64* %arg) { +define void @bcast_unfold_or_v2i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_or_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -605,12 +603,12 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ] - %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp4 = bitcast i64* %tmp3 to <2 x i64>* - %tmp5 = load <2 x i64>, <2 x i64>* %tmp4, align 8 + %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <2 x i64>, ptr %tmp4, align 8 %tmp6 = or <2 x i64> %tmp5, - %tmp7 = bitcast i64* %tmp3 to <2 x i64>* - store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp3 to ptr + store <2 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 2 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb2 @@ -619,7 +617,7 @@ bb10: ; preds = %bb2 ret void } -define void @bcast_unfold_fneg_v16f32(float* %arg) { +define void @bcast_unfold_fneg_v16f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fneg_v16f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -639,12 +637,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <16 x float>* - %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x float>, ptr %tmp3, align 4 %tmp5 = fneg <16 x float> %tmp4 - %tmp6 = bitcast float* %tmp2 to <16 x float>* - store <16 x float> %tmp5, <16 x float>* %tmp6, align 4 + %tmp6 = bitcast ptr %tmp2 to ptr + store <16 x float> %tmp5, ptr %tmp6, align 4 %tmp7 = add i64 %tmp, 16 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -653,7 +651,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fneg_v8f32(float* %arg) { +define void @bcast_unfold_fneg_v8f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fneg_v8f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -673,12 +671,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <8 x float>* - %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x float>, ptr %tmp3, align 4 %tmp5 = fneg <8 x float> %tmp4 - %tmp6 = bitcast float* %tmp2 to <8 x float>* - store <8 x float> %tmp5, <8 x float>* %tmp6, align 4 + %tmp6 = bitcast ptr %tmp2 to ptr + store <8 x float> %tmp5, ptr %tmp6, align 4 %tmp7 = add i64 %tmp, 8 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -687,7 +685,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fneg_v4f32(float* %arg) { +define void @bcast_unfold_fneg_v4f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fneg_v4f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -706,12 +704,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <4 x float>* - %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x float>, ptr %tmp3, align 4 %tmp5 = fneg <4 x float> %tmp4 - %tmp6 = bitcast float* %tmp2 to <4 x float>* - store <4 x float> %tmp5, <4 x float>* %tmp6, align 4 + %tmp6 = bitcast ptr %tmp2 to ptr + store <4 x float> %tmp5, ptr %tmp6, align 4 %tmp7 = add i64 %tmp, 4 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -720,7 +718,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fneg_v8f64(double* %arg) { +define void @bcast_unfold_fneg_v8f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fneg_v8f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -740,12 +738,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <8 x double>* - %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x double>, ptr %tmp3, align 8 %tmp5 = fneg <8 x double> %tmp4 - %tmp6 = bitcast double* %tmp2 to <8 x double>* - store <8 x double> %tmp5, <8 x double>* %tmp6, align 8 + %tmp6 = bitcast ptr %tmp2 to ptr + store <8 x double> %tmp5, ptr %tmp6, align 8 %tmp7 = add i64 %tmp, 8 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -754,7 +752,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fneg_v4f64(double* %arg) { +define void @bcast_unfold_fneg_v4f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fneg_v4f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -774,12 +772,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <4 x double>* - %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x double>, ptr %tmp3, align 8 %tmp5 = fneg <4 x double> %tmp4 - %tmp6 = bitcast double* %tmp2 to <4 x double>* - store <4 x double> %tmp5, <4 x double>* %tmp6, align 8 + %tmp6 = bitcast ptr %tmp2 to ptr + store <4 x double> %tmp5, ptr %tmp6, align 8 %tmp7 = add i64 %tmp, 4 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -788,7 +786,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fneg_v2f64(double* %arg) { +define void @bcast_unfold_fneg_v2f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fneg_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -808,12 +806,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <2 x double>* - %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x double>, ptr %tmp3, align 8 %tmp5 = fneg <2 x double> %tmp4 - %tmp6 = bitcast double* %tmp2 to <2 x double>* - store <2 x double> %tmp5, <2 x double>* %tmp6, align 8 + %tmp6 = bitcast ptr %tmp2 to ptr + store <2 x double> %tmp5, ptr %tmp6, align 8 %tmp7 = add i64 %tmp, 2 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -822,7 +820,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fabs_v16f32(float* %arg) { +define void @bcast_unfold_fabs_v16f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fabs_v16f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -842,12 +840,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <16 x float>* - %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x float>, ptr %tmp3, align 4 %tmp5 = call <16 x float> @llvm.fabs.v16f32(<16 x float> %tmp4) - %tmp6 = bitcast float* %tmp2 to <16 x float>* - store <16 x float> %tmp5, <16 x float>* %tmp6, align 4 + %tmp6 = bitcast ptr %tmp2 to ptr + store <16 x float> %tmp5, ptr %tmp6, align 4 %tmp7 = add i64 %tmp, 16 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -859,7 +857,7 @@ bb9: ; preds = %bb1 ; Function Attrs: nounwind readnone speculatable willreturn declare <16 x float> @llvm.fabs.v16f32(<16 x float>) #0 -define void @bcast_unfold_fabs_v8f32(float* %arg) { +define void @bcast_unfold_fabs_v8f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fabs_v8f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -879,12 +877,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <8 x float>* - %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x float>, ptr %tmp3, align 4 %tmp5 = call <8 x float> @llvm.fabs.v8f32(<8 x float> %tmp4) - %tmp6 = bitcast float* %tmp2 to <8 x float>* - store <8 x float> %tmp5, <8 x float>* %tmp6, align 4 + %tmp6 = bitcast ptr %tmp2 to ptr + store <8 x float> %tmp5, ptr %tmp6, align 4 %tmp7 = add i64 %tmp, 8 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -896,7 +894,7 @@ bb9: ; preds = %bb1 ; Function Attrs: nounwind readnone speculatable willreturn declare <8 x float> @llvm.fabs.v8f32(<8 x float>) #0 -define void @bcast_unfold_fabs_v4f32(float* %arg) { +define void @bcast_unfold_fabs_v4f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fabs_v4f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -915,12 +913,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <4 x float>* - %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x float>, ptr %tmp3, align 4 %tmp5 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %tmp4) - %tmp6 = bitcast float* %tmp2 to <4 x float>* - store <4 x float> %tmp5, <4 x float>* %tmp6, align 4 + %tmp6 = bitcast ptr %tmp2 to ptr + store <4 x float> %tmp5, ptr %tmp6, align 4 %tmp7 = add i64 %tmp, 4 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -932,7 +930,7 @@ bb9: ; preds = %bb1 ; Function Attrs: nounwind readnone speculatable willreturn declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #0 -define void @bcast_unfold_fabs_v8f64(double* %arg) { +define void @bcast_unfold_fabs_v8f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fabs_v8f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -952,12 +950,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <8 x double>* - %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x double>, ptr %tmp3, align 8 %tmp5 = call <8 x double> @llvm.fabs.v8f64(<8 x double> %tmp4) - %tmp6 = bitcast double* %tmp2 to <8 x double>* - store <8 x double> %tmp5, <8 x double>* %tmp6, align 8 + %tmp6 = bitcast ptr %tmp2 to ptr + store <8 x double> %tmp5, ptr %tmp6, align 8 %tmp7 = add i64 %tmp, 8 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -969,7 +967,7 @@ bb9: ; preds = %bb1 ; Function Attrs: nounwind readnone speculatable willreturn declare <8 x double> @llvm.fabs.v8f64(<8 x double>) #0 -define void @bcast_unfold_fabs_v4f64(double* %arg) { +define void @bcast_unfold_fabs_v4f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fabs_v4f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -989,12 +987,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <4 x double>* - %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x double>, ptr %tmp3, align 8 %tmp5 = call <4 x double> @llvm.fabs.v4f64(<4 x double> %tmp4) - %tmp6 = bitcast double* %tmp2 to <4 x double>* - store <4 x double> %tmp5, <4 x double>* %tmp6, align 8 + %tmp6 = bitcast ptr %tmp2 to ptr + store <4 x double> %tmp5, ptr %tmp6, align 8 %tmp7 = add i64 %tmp, 4 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1006,7 +1004,7 @@ bb9: ; preds = %bb1 ; Function Attrs: nounwind readnone speculatable willreturn declare <4 x double> @llvm.fabs.v4f64(<4 x double>) #0 -define void @bcast_unfold_fabs_v2f64(double* %arg) { +define void @bcast_unfold_fabs_v2f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fabs_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -1026,12 +1024,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <2 x double>* - %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x double>, ptr %tmp3, align 8 %tmp5 = call <2 x double> @llvm.fabs.v2f64(<2 x double> %tmp4) - %tmp6 = bitcast double* %tmp2 to <2 x double>* - store <2 x double> %tmp5, <2 x double>* %tmp6, align 8 + %tmp6 = bitcast ptr %tmp2 to ptr + store <2 x double> %tmp5, ptr %tmp6, align 8 %tmp7 = add i64 %tmp, 2 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1043,7 +1041,7 @@ bb9: ; preds = %bb1 ; Function Attrs: nounwind readnone speculatable willreturn declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0 -define void @bcast_unfold_fadd_v16f32(float* nocapture %arg) { +define void @bcast_unfold_fadd_v16f32(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fadd_v16f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -1063,12 +1061,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <16 x float>* - %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x float>, ptr %tmp3, align 4 %tmp5 = fadd <16 x float> %tmp4, - %tmp6 = bitcast float* %tmp2 to <16 x float>* - store <16 x float> %tmp5, <16 x float>* %tmp6, align 4 + %tmp6 = bitcast ptr %tmp2 to ptr + store <16 x float> %tmp5, ptr %tmp6, align 4 %tmp7 = add i64 %tmp, 16 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1077,7 +1075,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fadd_v8f32(float* nocapture %arg) { +define void @bcast_unfold_fadd_v8f32(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fadd_v8f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -1097,12 +1095,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <8 x float>* - %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x float>, ptr %tmp3, align 4 %tmp5 = fadd <8 x float> %tmp4, - %tmp6 = bitcast float* %tmp2 to <8 x float>* - store <8 x float> %tmp5, <8 x float>* %tmp6, align 4 + %tmp6 = bitcast ptr %tmp2 to ptr + store <8 x float> %tmp5, ptr %tmp6, align 4 %tmp7 = add i64 %tmp, 8 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1111,7 +1109,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fadd_v4f32(float* nocapture %arg) { +define void @bcast_unfold_fadd_v4f32(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fadd_v4f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -1130,12 +1128,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <4 x float>* - %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x float>, ptr %tmp3, align 4 %tmp5 = fadd <4 x float> %tmp4, - %tmp6 = bitcast float* %tmp2 to <4 x float>* - store <4 x float> %tmp5, <4 x float>* %tmp6, align 4 + %tmp6 = bitcast ptr %tmp2 to ptr + store <4 x float> %tmp5, ptr %tmp6, align 4 %tmp7 = add i64 %tmp, 4 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1144,7 +1142,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fadd_v8f64(double* nocapture %arg) { +define void @bcast_unfold_fadd_v8f64(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fadd_v8f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -1164,12 +1162,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <8 x double>* - %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x double>, ptr %tmp3, align 8 %tmp5 = fadd <8 x double> %tmp4, - %tmp6 = bitcast double* %tmp2 to <8 x double>* - store <8 x double> %tmp5, <8 x double>* %tmp6, align 8 + %tmp6 = bitcast ptr %tmp2 to ptr + store <8 x double> %tmp5, ptr %tmp6, align 8 %tmp7 = add i64 %tmp, 8 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1178,7 +1176,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fadd_v4f64(double* nocapture %arg) { +define void @bcast_unfold_fadd_v4f64(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fadd_v4f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -1198,12 +1196,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <4 x double>* - %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x double>, ptr %tmp3, align 8 %tmp5 = fadd <4 x double> %tmp4, - %tmp6 = bitcast double* %tmp2 to <4 x double>* - store <4 x double> %tmp5, <4 x double>* %tmp6, align 8 + %tmp6 = bitcast ptr %tmp2 to ptr + store <4 x double> %tmp5, ptr %tmp6, align 8 %tmp7 = add i64 %tmp, 4 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1212,7 +1210,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fadd_v2f64(double* nocapture %arg) { +define void @bcast_unfold_fadd_v2f64(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fadd_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -1232,12 +1230,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <2 x double>* - %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x double>, ptr %tmp3, align 8 %tmp5 = fadd <2 x double> %tmp4, - %tmp6 = bitcast double* %tmp2 to <2 x double>* - store <2 x double> %tmp5, <2 x double>* %tmp6, align 8 + %tmp6 = bitcast ptr %tmp2 to ptr + store <2 x double> %tmp5, ptr %tmp6, align 8 %tmp7 = add i64 %tmp, 2 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1246,7 +1244,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fmul_v16f32(float* nocapture %arg) { +define void @bcast_unfold_fmul_v16f32(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fmul_v16f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -1266,12 +1264,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <16 x float>* - %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x float>, ptr %tmp3, align 4 %tmp5 = fmul <16 x float> %tmp4, - %tmp6 = bitcast float* %tmp2 to <16 x float>* - store <16 x float> %tmp5, <16 x float>* %tmp6, align 4 + %tmp6 = bitcast ptr %tmp2 to ptr + store <16 x float> %tmp5, ptr %tmp6, align 4 %tmp7 = add i64 %tmp, 16 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1280,7 +1278,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fmul_v8f32(float* nocapture %arg) { +define void @bcast_unfold_fmul_v8f32(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fmul_v8f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -1300,12 +1298,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <8 x float>* - %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x float>, ptr %tmp3, align 4 %tmp5 = fmul <8 x float> %tmp4, - %tmp6 = bitcast float* %tmp2 to <8 x float>* - store <8 x float> %tmp5, <8 x float>* %tmp6, align 4 + %tmp6 = bitcast ptr %tmp2 to ptr + store <8 x float> %tmp5, ptr %tmp6, align 4 %tmp7 = add i64 %tmp, 8 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1314,7 +1312,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fmul_v4f32(float* nocapture %arg) { +define void @bcast_unfold_fmul_v4f32(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fmul_v4f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -1333,12 +1331,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <4 x float>* - %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x float>, ptr %tmp3, align 4 %tmp5 = fmul <4 x float> %tmp4, - %tmp6 = bitcast float* %tmp2 to <4 x float>* - store <4 x float> %tmp5, <4 x float>* %tmp6, align 4 + %tmp6 = bitcast ptr %tmp2 to ptr + store <4 x float> %tmp5, ptr %tmp6, align 4 %tmp7 = add i64 %tmp, 4 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1347,7 +1345,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fmul_v8f64(double* nocapture %arg) { +define void @bcast_unfold_fmul_v8f64(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fmul_v8f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -1367,12 +1365,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <8 x double>* - %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x double>, ptr %tmp3, align 8 %tmp5 = fmul <8 x double> %tmp4, - %tmp6 = bitcast double* %tmp2 to <8 x double>* - store <8 x double> %tmp5, <8 x double>* %tmp6, align 8 + %tmp6 = bitcast ptr %tmp2 to ptr + store <8 x double> %tmp5, ptr %tmp6, align 8 %tmp7 = add i64 %tmp, 8 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1381,7 +1379,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fmul_v4f64(double* nocapture %arg) { +define void @bcast_unfold_fmul_v4f64(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fmul_v4f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -1401,12 +1399,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <4 x double>* - %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x double>, ptr %tmp3, align 8 %tmp5 = fmul <4 x double> %tmp4, - %tmp6 = bitcast double* %tmp2 to <4 x double>* - store <4 x double> %tmp5, <4 x double>* %tmp6, align 8 + %tmp6 = bitcast ptr %tmp2 to ptr + store <4 x double> %tmp5, ptr %tmp6, align 8 %tmp7 = add i64 %tmp, 4 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1415,7 +1413,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fmul_v2f64(double* nocapture %arg) { +define void @bcast_unfold_fmul_v2f64(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fmul_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -1435,12 +1433,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <2 x double>* - %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x double>, ptr %tmp3, align 8 %tmp5 = fmul <2 x double> %tmp4, - %tmp6 = bitcast double* %tmp2 to <2 x double>* - store <2 x double> %tmp5, <2 x double>* %tmp6, align 8 + %tmp6 = bitcast ptr %tmp2 to ptr + store <2 x double> %tmp5, ptr %tmp6, align 8 %tmp7 = add i64 %tmp, 2 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1449,7 +1447,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fdiv_v16f32(float* nocapture %arg) { +define void @bcast_unfold_fdiv_v16f32(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v16f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -1470,12 +1468,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <16 x float>* - %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x float>, ptr %tmp3, align 4 %tmp5 = fdiv <16 x float> %tmp4, - %tmp6 = bitcast float* %tmp2 to <16 x float>* - store <16 x float> %tmp5, <16 x float>* %tmp6, align 4 + %tmp6 = bitcast ptr %tmp2 to ptr + store <16 x float> %tmp5, ptr %tmp6, align 4 %tmp7 = add i64 %tmp, 16 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1484,7 +1482,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fdiv_v8f32(float* nocapture %arg) { +define void @bcast_unfold_fdiv_v8f32(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v8f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -1505,12 +1503,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <8 x float>* - %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x float>, ptr %tmp3, align 4 %tmp5 = fdiv <8 x float> %tmp4, - %tmp6 = bitcast float* %tmp2 to <8 x float>* - store <8 x float> %tmp5, <8 x float>* %tmp6, align 4 + %tmp6 = bitcast ptr %tmp2 to ptr + store <8 x float> %tmp5, ptr %tmp6, align 4 %tmp7 = add i64 %tmp, 8 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1519,7 +1517,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fdiv_v4f32(float* nocapture %arg) { +define void @bcast_unfold_fdiv_v4f32(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v4f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -1539,12 +1537,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <4 x float>* - %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x float>, ptr %tmp3, align 4 %tmp5 = fdiv <4 x float> %tmp4, - %tmp6 = bitcast float* %tmp2 to <4 x float>* - store <4 x float> %tmp5, <4 x float>* %tmp6, align 4 + %tmp6 = bitcast ptr %tmp2 to ptr + store <4 x float> %tmp5, ptr %tmp6, align 4 %tmp7 = add i64 %tmp, 4 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1553,7 +1551,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fdiv_v8f64(double* nocapture %arg) { +define void @bcast_unfold_fdiv_v8f64(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v8f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -1574,12 +1572,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <8 x double>* - %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x double>, ptr %tmp3, align 8 %tmp5 = fdiv <8 x double> %tmp4, - %tmp6 = bitcast double* %tmp2 to <8 x double>* - store <8 x double> %tmp5, <8 x double>* %tmp6, align 8 + %tmp6 = bitcast ptr %tmp2 to ptr + store <8 x double> %tmp5, ptr %tmp6, align 8 %tmp7 = add i64 %tmp, 8 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1588,7 +1586,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fdiv_v4f64(double* nocapture %arg) { +define void @bcast_unfold_fdiv_v4f64(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v4f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -1609,12 +1607,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <4 x double>* - %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x double>, ptr %tmp3, align 8 %tmp5 = fdiv <4 x double> %tmp4, - %tmp6 = bitcast double* %tmp2 to <4 x double>* - store <4 x double> %tmp5, <4 x double>* %tmp6, align 8 + %tmp6 = bitcast ptr %tmp2 to ptr + store <4 x double> %tmp5, ptr %tmp6, align 8 %tmp7 = add i64 %tmp, 4 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1623,7 +1621,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fdiv_v2f64(double* nocapture %arg) { +define void @bcast_unfold_fdiv_v2f64(ptr nocapture %arg) { ; CHECK-LABEL: bcast_unfold_fdiv_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -1644,12 +1642,12 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <2 x double>* - %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x double>, ptr %tmp3, align 8 %tmp5 = fdiv <2 x double> %tmp4, - %tmp6 = bitcast double* %tmp2 to <2 x double>* - store <2 x double> %tmp5, <2 x double>* %tmp6, align 8 + %tmp6 = bitcast ptr %tmp2 to ptr + store <2 x double> %tmp5, ptr %tmp6, align 8 %tmp7 = add i64 %tmp, 2 %tmp8 = icmp eq i64 %tmp7, 1024 br i1 %tmp8, label %bb9, label %bb1 @@ -1658,7 +1656,7 @@ bb9: ; preds = %bb1 ret void } -define void @bcast_unfold_fma213_v4f32(float* %arg) { +define void @bcast_unfold_fma213_v4f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fma213_v4f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -1678,13 +1676,13 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ] - %tmp3 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp4 = bitcast float* %tmp3 to <4 x float>* - %tmp5 = load <4 x float>, <4 x float>* %tmp4, align 4 + %tmp3 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <4 x float>, ptr %tmp4, align 4 %tmp6 = fmul contract <4 x float> %tmp5, %tmp5 %tmp7 = fadd contract <4 x float> %tmp6, - %tmp8 = bitcast float* %tmp3 to <4 x float>* - store <4 x float> %tmp7, <4 x float>* %tmp8, align 4 + %tmp8 = bitcast ptr %tmp3 to ptr + store <4 x float> %tmp7, ptr %tmp8, align 4 %tmp9 = add i64 %tmp, 4 %tmp10 = icmp eq i64 %tmp9, 1024 br i1 %tmp10, label %bb11, label %bb2 @@ -1693,7 +1691,7 @@ bb11: ; preds = %bb2 ret void } -define void @bcast_unfold_fma231_v4f32(float* %arg) { +define void @bcast_unfold_fma231_v4f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fma231_v4f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -1713,13 +1711,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <4 x float>* - %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x float>, ptr %tmp3, align 4 %tmp5 = fmul contract <4 x float> %tmp4, %tmp6 = fadd contract <4 x float> %tmp4, %tmp5 - %tmp7 = bitcast float* %tmp2 to <4 x float>* - store <4 x float> %tmp6, <4 x float>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x float> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -1728,7 +1726,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fma213_v8f32(float* %arg) { +define void @bcast_unfold_fma213_v8f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fma213_v8f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -1749,13 +1747,13 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ] - %tmp3 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp4 = bitcast float* %tmp3 to <8 x float>* - %tmp5 = load <8 x float>, <8 x float>* %tmp4, align 4 + %tmp3 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <8 x float>, ptr %tmp4, align 4 %tmp6 = fmul contract <8 x float> %tmp5, %tmp5 %tmp7 = fadd contract <8 x float> %tmp6, - %tmp8 = bitcast float* %tmp3 to <8 x float>* - store <8 x float> %tmp7, <8 x float>* %tmp8, align 4 + %tmp8 = bitcast ptr %tmp3 to ptr + store <8 x float> %tmp7, ptr %tmp8, align 4 %tmp9 = add i64 %tmp, 8 %tmp10 = icmp eq i64 %tmp9, 1024 br i1 %tmp10, label %bb11, label %bb2 @@ -1764,7 +1762,7 @@ bb11: ; preds = %bb2 ret void } -define void @bcast_unfold_fma231_v8f32(float* %arg) { +define void @bcast_unfold_fma231_v8f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fma231_v8f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -1785,13 +1783,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <8 x float>* - %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x float>, ptr %tmp3, align 4 %tmp5 = fmul contract <8 x float> %tmp4, %tmp6 = fadd contract <8 x float> %tmp4, %tmp5 - %tmp7 = bitcast float* %tmp2 to <8 x float>* - store <8 x float> %tmp6, <8 x float>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x float> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -1800,7 +1798,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fma213_v16f32(float* %arg) { +define void @bcast_unfold_fma213_v16f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fma213_v16f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -1821,13 +1819,13 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ] - %tmp3 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp4 = bitcast float* %tmp3 to <16 x float>* - %tmp5 = load <16 x float>, <16 x float>* %tmp4, align 4 + %tmp3 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <16 x float>, ptr %tmp4, align 4 %tmp6 = fmul contract <16 x float> %tmp5, %tmp5 %tmp7 = fadd contract <16 x float> %tmp6, - %tmp8 = bitcast float* %tmp3 to <16 x float>* - store <16 x float> %tmp7, <16 x float>* %tmp8, align 4 + %tmp8 = bitcast ptr %tmp3 to ptr + store <16 x float> %tmp7, ptr %tmp8, align 4 %tmp9 = add i64 %tmp, 16 %tmp10 = icmp eq i64 %tmp9, 1024 br i1 %tmp10, label %bb11, label %bb2 @@ -1836,7 +1834,7 @@ bb11: ; preds = %bb2 ret void } -define void @bcast_unfold_fma231_v16f32(float* %arg) { +define void @bcast_unfold_fma231_v16f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fma231_v16f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -1857,13 +1855,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <16 x float>* - %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x float>, ptr %tmp3, align 4 %tmp5 = fmul contract <16 x float> %tmp4, %tmp6 = fadd contract <16 x float> %tmp4, %tmp5 - %tmp7 = bitcast float* %tmp2 to <16 x float>* - store <16 x float> %tmp6, <16 x float>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <16 x float> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 16 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -1872,7 +1870,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fma213_v2f64(double* %arg) { +define void @bcast_unfold_fma213_v2f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fma213_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -1893,13 +1891,13 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ] - %tmp3 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp4 = bitcast double* %tmp3 to <2 x double>* - %tmp5 = load <2 x double>, <2 x double>* %tmp4, align 4 + %tmp3 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <2 x double>, ptr %tmp4, align 4 %tmp6 = fmul contract <2 x double> %tmp5, %tmp5 %tmp7 = fadd contract <2 x double> %tmp6, - %tmp8 = bitcast double* %tmp3 to <2 x double>* - store <2 x double> %tmp7, <2 x double>* %tmp8, align 8 + %tmp8 = bitcast ptr %tmp3 to ptr + store <2 x double> %tmp7, ptr %tmp8, align 8 %tmp9 = add i64 %tmp, 2 %tmp10 = icmp eq i64 %tmp9, 1024 br i1 %tmp10, label %bb11, label %bb2 @@ -1908,7 +1906,7 @@ bb11: ; preds = %bb2 ret void } -define void @bcast_unfold_fma231_v2f64(double* %arg) { +define void @bcast_unfold_fma231_v2f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fma231_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -1929,13 +1927,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <2 x double>* - %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x double>, ptr %tmp3, align 8 %tmp5 = fmul contract <2 x double> %tmp4, %tmp6 = fadd contract <2 x double> %tmp4, %tmp5 - %tmp7 = bitcast double* %tmp2 to <2 x double>* - store <2 x double> %tmp6, <2 x double>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <2 x double> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 2 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -1944,7 +1942,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fma213_v4f64(double* %arg) { +define void @bcast_unfold_fma213_v4f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fma213_v4f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -1965,13 +1963,13 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ] - %tmp3 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp4 = bitcast double* %tmp3 to <4 x double>* - %tmp5 = load <4 x double>, <4 x double>* %tmp4, align 8 + %tmp3 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <4 x double>, ptr %tmp4, align 8 %tmp6 = fmul contract <4 x double> %tmp5, %tmp5 %tmp7 = fadd contract <4 x double> %tmp6, - %tmp8 = bitcast double* %tmp3 to <4 x double>* - store <4 x double> %tmp7, <4 x double>* %tmp8, align 8 + %tmp8 = bitcast ptr %tmp3 to ptr + store <4 x double> %tmp7, ptr %tmp8, align 8 %tmp9 = add i64 %tmp, 4 %tmp10 = icmp eq i64 %tmp9, 1024 br i1 %tmp10, label %bb11, label %bb2 @@ -1980,7 +1978,7 @@ bb11: ; preds = %bb2 ret void } -define void @bcast_unfold_fma231_v4f64(double* %arg) { +define void @bcast_unfold_fma231_v4f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fma231_v4f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -2001,13 +1999,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <4 x double>* - %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x double>, ptr %tmp3, align 8 %tmp5 = fmul contract <4 x double> %tmp4, %tmp6 = fadd contract <4 x double> %tmp4, %tmp5 - %tmp7 = bitcast double* %tmp2 to <4 x double>* - store <4 x double> %tmp6, <4 x double>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x double> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2016,7 +2014,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fma213_v8f64(double* %arg) { +define void @bcast_unfold_fma213_v8f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fma213_v8f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -2037,13 +2035,13 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ] - %tmp3 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp4 = bitcast double* %tmp3 to <8 x double>* - %tmp5 = load <8 x double>, <8 x double>* %tmp4, align 8 + %tmp3 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <8 x double>, ptr %tmp4, align 8 %tmp6 = fmul contract <8 x double> %tmp5, %tmp5 %tmp7 = fadd contract <8 x double> %tmp6, - %tmp8 = bitcast double* %tmp3 to <8 x double>* - store <8 x double> %tmp7, <8 x double>* %tmp8, align 8 + %tmp8 = bitcast ptr %tmp3 to ptr + store <8 x double> %tmp7, ptr %tmp8, align 8 %tmp9 = add i64 %tmp, 8 %tmp10 = icmp eq i64 %tmp9, 1024 br i1 %tmp10, label %bb11, label %bb2 @@ -2052,7 +2050,7 @@ bb11: ; preds = %bb2 ret void } -define void @bcast_unfold_fma231_v8f64(double* %arg) { +define void @bcast_unfold_fma231_v8f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fma231_v8f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -2073,13 +2071,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <8 x double>* - %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x double>, ptr %tmp3, align 8 %tmp5 = fmul contract <8 x double> %tmp4, %tmp6 = fadd contract <8 x double> %tmp4, %tmp5 - %tmp7 = bitcast double* %tmp2 to <8 x double>* - store <8 x double> %tmp6, <8 x double>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x double> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2088,7 +2086,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fmax_v4f32(float* %arg) { +define void @bcast_unfold_fmax_v4f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fmax_v4f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -2108,13 +2106,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <4 x float>* - %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x float>, ptr %tmp3, align 4 %tmp5 = fcmp ogt <4 x float> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x float> %tmp4, <4 x float> - %tmp7 = bitcast float* %tmp2 to <4 x float>* - store <4 x float> %tmp6, <4 x float>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x float> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2123,7 +2121,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fmax_v8f32(float* %arg) { +define void @bcast_unfold_fmax_v8f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fmax_v8f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -2144,13 +2142,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <8 x float>* - %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x float>, ptr %tmp3, align 4 %tmp5 = fcmp ogt <8 x float> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x float> %tmp4, <8 x float> - %tmp7 = bitcast float* %tmp2 to <8 x float>* - store <8 x float> %tmp6, <8 x float>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x float> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2159,7 +2157,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fmax_v16f32(float* %arg) { +define void @bcast_unfold_fmax_v16f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fmax_v16f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -2180,13 +2178,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <16 x float>* - %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x float>, ptr %tmp3, align 4 %tmp5 = fcmp ogt <16 x float> %tmp4, %tmp6 = select <16 x i1> %tmp5, <16 x float> %tmp4, <16 x float> - %tmp7 = bitcast float* %tmp2 to <16 x float>* - store <16 x float> %tmp6, <16 x float>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <16 x float> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 16 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2195,7 +2193,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fmax_v2f64(double* %arg) { +define void @bcast_unfold_fmax_v2f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fmax_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -2216,13 +2214,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <2 x double>* - %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x double>, ptr %tmp3, align 8 %tmp5 = fcmp ogt <2 x double> %tmp4, %tmp6 = select <2 x i1> %tmp5, <2 x double> %tmp4, <2 x double> - %tmp7 = bitcast double* %tmp2 to <2 x double>* - store <2 x double> %tmp6, <2 x double>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <2 x double> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 2 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2231,7 +2229,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fmax_v4f64(double* %arg) { +define void @bcast_unfold_fmax_v4f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fmax_v4f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -2252,13 +2250,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <4 x double>* - %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x double>, ptr %tmp3, align 8 %tmp5 = fcmp ogt <4 x double> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x double> %tmp4, <4 x double> - %tmp7 = bitcast double* %tmp2 to <4 x double>* - store <4 x double> %tmp6, <4 x double>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x double> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2267,7 +2265,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fmax_v8f64(double* %arg) { +define void @bcast_unfold_fmax_v8f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fmax_v8f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -2288,13 +2286,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <8 x double>* - %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x double>, ptr %tmp3, align 8 %tmp5 = fcmp ogt <8 x double> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x double> %tmp4, <8 x double> - %tmp7 = bitcast double* %tmp2 to <8 x double>* - store <8 x double> %tmp6, <8 x double>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x double> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2303,7 +2301,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fmin_v4f32(float* %arg) { +define void @bcast_unfold_fmin_v4f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fmin_v4f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -2323,13 +2321,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <4 x float>* - %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x float>, ptr %tmp3, align 4 %tmp5 = fcmp olt <4 x float> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x float> %tmp4, <4 x float> - %tmp7 = bitcast float* %tmp2 to <4 x float>* - store <4 x float> %tmp6, <4 x float>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x float> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2338,7 +2336,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fmin_v8f32(float* %arg) { +define void @bcast_unfold_fmin_v8f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fmin_v8f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -2359,13 +2357,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <8 x float>* - %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x float>, ptr %tmp3, align 4 %tmp5 = fcmp olt <8 x float> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x float> %tmp4, <8 x float> - %tmp7 = bitcast float* %tmp2 to <8 x float>* - store <8 x float> %tmp6, <8 x float>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x float> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2374,7 +2372,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fmin_v16f32(float* %arg) { +define void @bcast_unfold_fmin_v16f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fmin_v16f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -2395,13 +2393,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <16 x float>* - %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x float>, ptr %tmp3, align 4 %tmp5 = fcmp olt <16 x float> %tmp4, %tmp6 = select <16 x i1> %tmp5, <16 x float> %tmp4, <16 x float> - %tmp7 = bitcast float* %tmp2 to <16 x float>* - store <16 x float> %tmp6, <16 x float>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <16 x float> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 16 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2410,7 +2408,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fmin_v2f64(double* %arg) { +define void @bcast_unfold_fmin_v2f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fmin_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -2431,13 +2429,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <2 x double>* - %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x double>, ptr %tmp3, align 8 %tmp5 = fcmp olt <2 x double> %tmp4, %tmp6 = select <2 x i1> %tmp5, <2 x double> %tmp4, <2 x double> - %tmp7 = bitcast double* %tmp2 to <2 x double>* - store <2 x double> %tmp6, <2 x double>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <2 x double> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 2 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2446,7 +2444,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fmin_v4f64(double* %arg) { +define void @bcast_unfold_fmin_v4f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fmin_v4f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -2467,13 +2465,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <4 x double>* - %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x double>, ptr %tmp3, align 8 %tmp5 = fcmp olt <4 x double> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x double> %tmp4, <4 x double> - %tmp7 = bitcast double* %tmp2 to <4 x double>* - store <4 x double> %tmp6, <4 x double>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x double> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2482,7 +2480,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_fmin_v8f64(double* %arg) { +define void @bcast_unfold_fmin_v8f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_fmin_v8f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -2503,13 +2501,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <8 x double>* - %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x double>, ptr %tmp3, align 8 %tmp5 = fcmp olt <8 x double> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x double> %tmp4, <8 x double> - %tmp7 = bitcast double* %tmp2 to <8 x double>* - store <8 x double> %tmp6, <8 x double>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x double> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2518,7 +2516,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_smin_v4i32(i32* %arg) { +define void @bcast_unfold_smin_v4i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_smin_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -2537,13 +2535,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <4 x i32>* - %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i32>, ptr %tmp3, align 4 %tmp5 = icmp slt <4 x i32> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x i32> %tmp4, <4 x i32> - %tmp7 = bitcast i32* %tmp2 to <4 x i32>* - store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2552,7 +2550,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_smin_v8i32(i32* %arg) { +define void @bcast_unfold_smin_v8i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_smin_v8i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -2572,13 +2570,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <8 x i32>* - %tmp4 = load <8 x i32>, <8 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x i32>, ptr %tmp3, align 4 %tmp5 = icmp slt <8 x i32> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x i32> %tmp4, <8 x i32> - %tmp7 = bitcast i32* %tmp2 to <8 x i32>* - store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2587,7 +2585,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_smin_v16i32(i32* %arg) { +define void @bcast_unfold_smin_v16i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_smin_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -2607,13 +2605,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <16 x i32>* - %tmp4 = load <16 x i32>, <16 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x i32>, ptr %tmp3, align 4 %tmp5 = icmp slt <16 x i32> %tmp4, %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32> - %tmp7 = bitcast i32* %tmp2 to <16 x i32>* - store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <16 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 16 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2622,7 +2620,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_smin_v2i64(i64* %arg) { +define void @bcast_unfold_smin_v2i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_smin_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -2641,13 +2639,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <2 x i64>* - %tmp4 = load <2 x i64>, <2 x i64>* %tmp3, align 8 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x i64>, ptr %tmp3, align 8 %tmp5 = icmp slt <2 x i64> %tmp4, %tmp6 = select <2 x i1> %tmp5, <2 x i64> %tmp4, <2 x i64> - %tmp7 = bitcast i64* %tmp2 to <2 x i64>* - store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <2 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 2 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2656,7 +2654,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_smin_v4i64(i64* %arg) { +define void @bcast_unfold_smin_v4i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_smin_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -2676,13 +2674,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <4 x i64>* - %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 8 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i64>, ptr %tmp3, align 8 %tmp5 = icmp slt <4 x i64> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x i64> %tmp4, <4 x i64> - %tmp7 = bitcast i64* %tmp2 to <4 x i64>* - store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2691,7 +2689,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_smin_v8i64(i64* %arg) { +define void @bcast_unfold_smin_v8i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_smin_v8i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -2711,13 +2709,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <8 x i64>* - %tmp4 = load <8 x i64>, <8 x i64>* %tmp3, align 8 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x i64>, ptr %tmp3, align 8 %tmp5 = icmp slt <8 x i64> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x i64> %tmp4, <8 x i64> - %tmp7 = bitcast i64* %tmp2 to <8 x i64>* - store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2726,7 +2724,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_smax_v4i32(i32* %arg) { +define void @bcast_unfold_smax_v4i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_smax_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -2745,13 +2743,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <4 x i32>* - %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i32>, ptr %tmp3, align 4 %tmp5 = icmp sgt <4 x i32> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x i32> %tmp4, <4 x i32> - %tmp7 = bitcast i32* %tmp2 to <4 x i32>* - store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2760,7 +2758,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_smax_v8i32(i32* %arg) { +define void @bcast_unfold_smax_v8i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_smax_v8i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -2780,13 +2778,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <8 x i32>* - %tmp4 = load <8 x i32>, <8 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x i32>, ptr %tmp3, align 4 %tmp5 = icmp sgt <8 x i32> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x i32> %tmp4, <8 x i32> - %tmp7 = bitcast i32* %tmp2 to <8 x i32>* - store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2795,7 +2793,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_smax_v16i32(i32* %arg) { +define void @bcast_unfold_smax_v16i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_smax_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -2815,13 +2813,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <16 x i32>* - %tmp4 = load <16 x i32>, <16 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x i32>, ptr %tmp3, align 4 %tmp5 = icmp sgt <16 x i32> %tmp4, %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32> - %tmp7 = bitcast i32* %tmp2 to <16 x i32>* - store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <16 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 16 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2830,7 +2828,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_smax_v2i64(i64* %arg) { +define void @bcast_unfold_smax_v2i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_smax_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -2849,13 +2847,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <2 x i64>* - %tmp4 = load <2 x i64>, <2 x i64>* %tmp3, align 8 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x i64>, ptr %tmp3, align 8 %tmp5 = icmp sgt <2 x i64> %tmp4, %tmp6 = select <2 x i1> %tmp5, <2 x i64> %tmp4, <2 x i64> - %tmp7 = bitcast i64* %tmp2 to <2 x i64>* - store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <2 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 2 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2864,7 +2862,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_smax_v4i64(i64* %arg) { +define void @bcast_unfold_smax_v4i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_smax_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -2884,13 +2882,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <4 x i64>* - %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 8 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i64>, ptr %tmp3, align 8 %tmp5 = icmp sgt <4 x i64> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x i64> %tmp4, <4 x i64> - %tmp7 = bitcast i64* %tmp2 to <4 x i64>* - store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2899,7 +2897,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_smax_v8i64(i64* %arg) { +define void @bcast_unfold_smax_v8i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_smax_v8i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -2919,13 +2917,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <8 x i64>* - %tmp4 = load <8 x i64>, <8 x i64>* %tmp3, align 8 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x i64>, ptr %tmp3, align 8 %tmp5 = icmp sgt <8 x i64> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x i64> %tmp4, <8 x i64> - %tmp7 = bitcast i64* %tmp2 to <8 x i64>* - store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2934,7 +2932,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_umin_v4i32(i32* %arg) { +define void @bcast_unfold_umin_v4i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_umin_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -2953,13 +2951,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <4 x i32>* - %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i32>, ptr %tmp3, align 4 %tmp5 = icmp ult <4 x i32> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x i32> %tmp4, <4 x i32> - %tmp7 = bitcast i32* %tmp2 to <4 x i32>* - store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -2968,7 +2966,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_umin_v8i32(i32* %arg) { +define void @bcast_unfold_umin_v8i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_umin_v8i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -2988,13 +2986,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <8 x i32>* - %tmp4 = load <8 x i32>, <8 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x i32>, ptr %tmp3, align 4 %tmp5 = icmp ult <8 x i32> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x i32> %tmp4, <8 x i32> - %tmp7 = bitcast i32* %tmp2 to <8 x i32>* - store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3003,7 +3001,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_umin_v16i32(i32* %arg) { +define void @bcast_unfold_umin_v16i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_umin_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -3023,13 +3021,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <16 x i32>* - %tmp4 = load <16 x i32>, <16 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x i32>, ptr %tmp3, align 4 %tmp5 = icmp ult <16 x i32> %tmp4, %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32> - %tmp7 = bitcast i32* %tmp2 to <16 x i32>* - store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <16 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 16 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3038,7 +3036,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_umin_v2i64(i64* %arg) { +define void @bcast_unfold_umin_v2i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_umin_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -3057,13 +3055,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <2 x i64>* - %tmp4 = load <2 x i64>, <2 x i64>* %tmp3, align 8 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x i64>, ptr %tmp3, align 8 %tmp5 = icmp ult <2 x i64> %tmp4, %tmp6 = select <2 x i1> %tmp5, <2 x i64> %tmp4, <2 x i64> - %tmp7 = bitcast i64* %tmp2 to <2 x i64>* - store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <2 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 2 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3072,7 +3070,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_umin_v4i64(i64* %arg) { +define void @bcast_unfold_umin_v4i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_umin_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -3092,13 +3090,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <4 x i64>* - %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 8 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i64>, ptr %tmp3, align 8 %tmp5 = icmp ult <4 x i64> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x i64> %tmp4, <4 x i64> - %tmp7 = bitcast i64* %tmp2 to <4 x i64>* - store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3107,7 +3105,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_umin_v8i64(i64* %arg) { +define void @bcast_unfold_umin_v8i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_umin_v8i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -3127,13 +3125,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <8 x i64>* - %tmp4 = load <8 x i64>, <8 x i64>* %tmp3, align 8 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x i64>, ptr %tmp3, align 8 %tmp5 = icmp ult <8 x i64> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x i64> %tmp4, <8 x i64> - %tmp7 = bitcast i64* %tmp2 to <8 x i64>* - store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3142,7 +3140,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_umax_v4i32(i32* %arg) { +define void @bcast_unfold_umax_v4i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_umax_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -3161,13 +3159,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <4 x i32>* - %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i32>, ptr %tmp3, align 4 %tmp5 = icmp ugt <4 x i32> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x i32> %tmp4, <4 x i32> - %tmp7 = bitcast i32* %tmp2 to <4 x i32>* - store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3176,7 +3174,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_umax_v8i32(i32* %arg) { +define void @bcast_unfold_umax_v8i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_umax_v8i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -3196,13 +3194,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <8 x i32>* - %tmp4 = load <8 x i32>, <8 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x i32>, ptr %tmp3, align 4 %tmp5 = icmp ugt <8 x i32> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x i32> %tmp4, <8 x i32> - %tmp7 = bitcast i32* %tmp2 to <8 x i32>* - store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3211,7 +3209,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_umax_v16i32(i32* %arg) { +define void @bcast_unfold_umax_v16i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_umax_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -3231,13 +3229,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <16 x i32>* - %tmp4 = load <16 x i32>, <16 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x i32>, ptr %tmp3, align 4 %tmp5 = icmp ugt <16 x i32> %tmp4, %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32> - %tmp7 = bitcast i32* %tmp2 to <16 x i32>* - store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <16 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 16 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3246,7 +3244,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_umax_v2i64(i64* %arg) { +define void @bcast_unfold_umax_v2i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_umax_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -3265,13 +3263,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <2 x i64>* - %tmp4 = load <2 x i64>, <2 x i64>* %tmp3, align 8 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x i64>, ptr %tmp3, align 8 %tmp5 = icmp ugt <2 x i64> %tmp4, %tmp6 = select <2 x i1> %tmp5, <2 x i64> %tmp4, <2 x i64> - %tmp7 = bitcast i64* %tmp2 to <2 x i64>* - store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <2 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 2 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3280,7 +3278,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_umax_v4i64(i64* %arg) { +define void @bcast_unfold_umax_v4i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_umax_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -3300,13 +3298,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <4 x i64>* - %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 8 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i64>, ptr %tmp3, align 8 %tmp5 = icmp ugt <4 x i64> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x i64> %tmp4, <4 x i64> - %tmp7 = bitcast i64* %tmp2 to <4 x i64>* - store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3315,7 +3313,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_umax_v8i64(i64* %arg) { +define void @bcast_unfold_umax_v8i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_umax_v8i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -3335,13 +3333,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <8 x i64>* - %tmp4 = load <8 x i64>, <8 x i64>* %tmp3, align 8 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x i64>, ptr %tmp3, align 8 %tmp5 = icmp ugt <8 x i64> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x i64> %tmp4, <8 x i64> - %tmp7 = bitcast i64* %tmp2 to <8 x i64>* - store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3350,7 +3348,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpgt_v4i32(i32* %arg) { +define void @bcast_unfold_pcmpgt_v4i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpgt_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -3371,13 +3369,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <4 x i32>* - %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i32>, ptr %tmp3, align 4 %tmp5 = icmp sgt <4 x i32> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x i32> , <4 x i32> %tmp4 - %tmp7 = bitcast i32* %tmp2 to <4 x i32>* - store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3386,7 +3384,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpgt_v8i32(i32* %arg) { +define void @bcast_unfold_pcmpgt_v8i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpgt_v8i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -3408,13 +3406,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <8 x i32>* - %tmp4 = load <8 x i32>, <8 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x i32>, ptr %tmp3, align 4 %tmp5 = icmp sgt <8 x i32> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x i32> , <8 x i32> %tmp4 - %tmp7 = bitcast i32* %tmp2 to <8 x i32>* - store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3423,7 +3421,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpgt_v16i32(i32* %arg) { +define void @bcast_unfold_pcmpgt_v16i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpgt_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -3445,13 +3443,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <16 x i32>* - %tmp4 = load <16 x i32>, <16 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x i32>, ptr %tmp3, align 4 %tmp5 = icmp sgt <16 x i32> %tmp4, %tmp6 = select <16 x i1> %tmp5, <16 x i32> , <16 x i32> %tmp4 - %tmp7 = bitcast i32* %tmp2 to <16 x i32>* - store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <16 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 16 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3460,7 +3458,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpgt_v2i64(i64* %arg) { +define void @bcast_unfold_pcmpgt_v2i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpgt_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -3481,13 +3479,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <2 x i64>* - %tmp4 = load <2 x i64>, <2 x i64>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x i64>, ptr %tmp3, align 4 %tmp5 = icmp sgt <2 x i64> %tmp4, %tmp6 = select <2 x i1> %tmp5, <2 x i64> , <2 x i64> %tmp4 - %tmp7 = bitcast i64* %tmp2 to <2 x i64>* - store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <2 x i64> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 2 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3495,7 +3493,7 @@ bb1: ; preds = %bb1, %bb bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpgt_v4i64(i64* %arg) { +define void @bcast_unfold_pcmpgt_v4i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpgt_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -3517,13 +3515,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <4 x i64>* - %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i64>, ptr %tmp3, align 4 %tmp5 = icmp sgt <4 x i64> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x i64> , <4 x i64> %tmp4 - %tmp7 = bitcast i64* %tmp2 to <4 x i64>* - store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i64> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3532,7 +3530,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpgt_v8i64(i64* %arg) { +define void @bcast_unfold_pcmpgt_v8i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpgt_v8i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -3554,13 +3552,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <8 x i64>* - %tmp4 = load <8 x i64>, <8 x i64>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x i64>, ptr %tmp3, align 4 %tmp5 = icmp sgt <8 x i64> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x i64> , <8 x i64> %tmp4 - %tmp7 = bitcast i64* %tmp2 to <8 x i64>* - store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x i64> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3569,7 +3567,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpeq_v4i32(i32* %arg) { +define void @bcast_unfold_pcmpeq_v4i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpeq_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -3590,13 +3588,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <4 x i32>* - %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i32>, ptr %tmp3, align 4 %tmp5 = icmp eq <4 x i32> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x i32> , <4 x i32> %tmp4 - %tmp7 = bitcast i32* %tmp2 to <4 x i32>* - store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3605,7 +3603,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpeq_v8i32(i32* %arg) { +define void @bcast_unfold_pcmpeq_v8i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpeq_v8i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -3627,13 +3625,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <8 x i32>* - %tmp4 = load <8 x i32>, <8 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x i32>, ptr %tmp3, align 4 %tmp5 = icmp eq <8 x i32> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x i32> , <8 x i32> %tmp4 - %tmp7 = bitcast i32* %tmp2 to <8 x i32>* - store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3642,7 +3640,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpeq_v16i32(i32* %arg) { +define void @bcast_unfold_pcmpeq_v16i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpeq_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -3664,13 +3662,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <16 x i32>* - %tmp4 = load <16 x i32>, <16 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x i32>, ptr %tmp3, align 4 %tmp5 = icmp eq <16 x i32> %tmp4, %tmp6 = select <16 x i1> %tmp5, <16 x i32> , <16 x i32> %tmp4 - %tmp7 = bitcast i32* %tmp2 to <16 x i32>* - store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <16 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 16 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3679,7 +3677,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpeq_v2i64(i64* %arg) { +define void @bcast_unfold_pcmpeq_v2i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpeq_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -3700,13 +3698,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <2 x i64>* - %tmp4 = load <2 x i64>, <2 x i64>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x i64>, ptr %tmp3, align 4 %tmp5 = icmp eq <2 x i64> %tmp4, %tmp6 = select <2 x i1> %tmp5, <2 x i64> , <2 x i64> %tmp4 - %tmp7 = bitcast i64* %tmp2 to <2 x i64>* - store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <2 x i64> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 2 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3714,7 +3712,7 @@ bb1: ; preds = %bb1, %bb bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpeq_v4i64(i64* %arg) { +define void @bcast_unfold_pcmpeq_v4i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpeq_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -3736,13 +3734,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <4 x i64>* - %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i64>, ptr %tmp3, align 4 %tmp5 = icmp eq <4 x i64> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x i64> , <4 x i64> %tmp4 - %tmp7 = bitcast i64* %tmp2 to <4 x i64>* - store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i64> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3751,7 +3749,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpeq_v8i64(i64* %arg) { +define void @bcast_unfold_pcmpeq_v8i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpeq_v8i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -3773,13 +3771,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <8 x i64>* - %tmp4 = load <8 x i64>, <8 x i64>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x i64>, ptr %tmp3, align 4 %tmp5 = icmp eq <8 x i64> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x i64> , <8 x i64> %tmp4 - %tmp7 = bitcast i64* %tmp2 to <8 x i64>* - store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x i64> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3788,7 +3786,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmp_v4i32(i32* %arg) { +define void @bcast_unfold_pcmp_v4i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmp_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: xorl %eax, %eax @@ -3810,13 +3808,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <4 x i32>* - %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i32>, ptr %tmp3, align 4 %tmp5 = icmp slt <4 x i32> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x i32> , <4 x i32> %tmp4 - %tmp7 = bitcast i32* %tmp2 to <4 x i32>* - store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp slt i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3825,7 +3823,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmp_v8i32(i32* %arg) { +define void @bcast_unfold_pcmp_v8i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmp_v8i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: xorl %eax, %eax @@ -3848,13 +3846,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <8 x i32>* - %tmp4 = load <8 x i32>, <8 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x i32>, ptr %tmp3, align 4 %tmp5 = icmp slt <8 x i32> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x i32> , <8 x i32> %tmp4 - %tmp7 = bitcast i32* %tmp2 to <8 x i32>* - store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp slt i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3863,7 +3861,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmp_v16i32(i32* %arg) { +define void @bcast_unfold_pcmp_v16i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmp_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: xorl %eax, %eax @@ -3886,13 +3884,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <16 x i32>* - %tmp4 = load <16 x i32>, <16 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x i32>, ptr %tmp3, align 4 %tmp5 = icmp slt <16 x i32> %tmp4, %tmp6 = select <16 x i1> %tmp5, <16 x i32> , <16 x i32> %tmp4 - %tmp7 = bitcast i32* %tmp2 to <16 x i32>* - store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <16 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 16 %tmp9 = icmp slt i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3901,7 +3899,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmp_v2i64(i64* %arg) { +define void @bcast_unfold_pcmp_v2i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmp_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: xorl %eax, %eax @@ -3923,13 +3921,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <2 x i64>* - %tmp4 = load <2 x i64>, <2 x i64>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x i64>, ptr %tmp3, align 4 %tmp5 = icmp slt <2 x i64> %tmp4, %tmp6 = select <2 x i1> %tmp5, <2 x i64> , <2 x i64> %tmp4 - %tmp7 = bitcast i64* %tmp2 to <2 x i64>* - store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <2 x i64> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 2 %tmp9 = icmp slt i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3937,7 +3935,7 @@ bb1: ; preds = %bb1, %bb bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmp_v4i64(i64* %arg) { +define void @bcast_unfold_pcmp_v4i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmp_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: xorl %eax, %eax @@ -3960,13 +3958,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <4 x i64>* - %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i64>, ptr %tmp3, align 4 %tmp5 = icmp slt <4 x i64> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x i64> , <4 x i64> %tmp4 - %tmp7 = bitcast i64* %tmp2 to <4 x i64>* - store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i64> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp slt i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -3975,7 +3973,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmp_v8i64(i64* %arg) { +define void @bcast_unfold_pcmp_v8i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmp_v8i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: xorl %eax, %eax @@ -3998,13 +3996,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <8 x i64>* - %tmp4 = load <8 x i64>, <8 x i64>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x i64>, ptr %tmp3, align 4 %tmp5 = icmp slt <8 x i64> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x i64> , <8 x i64> %tmp4 - %tmp7 = bitcast i64* %tmp2 to <8 x i64>* - store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x i64> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp slt i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4013,7 +4011,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpu_v4i32(i32* %arg) { +define void @bcast_unfold_pcmpu_v4i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpu_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: xorl %eax, %eax @@ -4035,13 +4033,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <4 x i32>* - %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i32>, ptr %tmp3, align 4 %tmp5 = icmp ult <4 x i32> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x i32> , <4 x i32> %tmp4 - %tmp7 = bitcast i32* %tmp2 to <4 x i32>* - store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp ult i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4050,7 +4048,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpu_v8i32(i32* %arg) { +define void @bcast_unfold_pcmpu_v8i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpu_v8i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: xorl %eax, %eax @@ -4073,13 +4071,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <8 x i32>* - %tmp4 = load <8 x i32>, <8 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x i32>, ptr %tmp3, align 4 %tmp5 = icmp ult <8 x i32> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x i32> , <8 x i32> %tmp4 - %tmp7 = bitcast i32* %tmp2 to <8 x i32>* - store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp ult i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4088,7 +4086,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpu_v16i32(i32* %arg) { +define void @bcast_unfold_pcmpu_v16i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpu_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: xorl %eax, %eax @@ -4111,13 +4109,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <16 x i32>* - %tmp4 = load <16 x i32>, <16 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x i32>, ptr %tmp3, align 4 %tmp5 = icmp ult <16 x i32> %tmp4, %tmp6 = select <16 x i1> %tmp5, <16 x i32> , <16 x i32> %tmp4 - %tmp7 = bitcast i32* %tmp2 to <16 x i32>* - store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <16 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 16 %tmp9 = icmp ult i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4126,7 +4124,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpu_v2i64(i64* %arg) { +define void @bcast_unfold_pcmpu_v2i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpu_v2i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: xorl %eax, %eax @@ -4148,13 +4146,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <2 x i64>* - %tmp4 = load <2 x i64>, <2 x i64>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x i64>, ptr %tmp3, align 4 %tmp5 = icmp ult <2 x i64> %tmp4, %tmp6 = select <2 x i1> %tmp5, <2 x i64> , <2 x i64> %tmp4 - %tmp7 = bitcast i64* %tmp2 to <2 x i64>* - store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <2 x i64> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 2 %tmp9 = icmp ult i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4162,7 +4160,7 @@ bb1: ; preds = %bb1, %bb bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpu_v4i64(i64* %arg) { +define void @bcast_unfold_pcmpu_v4i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpu_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: xorl %eax, %eax @@ -4185,13 +4183,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <4 x i64>* - %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i64>, ptr %tmp3, align 4 %tmp5 = icmp ult <4 x i64> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x i64> , <4 x i64> %tmp4 - %tmp7 = bitcast i64* %tmp2 to <4 x i64>* - store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i64> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp ult i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4200,7 +4198,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_pcmpu_v8i64(i64* %arg) { +define void @bcast_unfold_pcmpu_v8i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_pcmpu_v8i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: xorl %eax, %eax @@ -4223,13 +4221,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <8 x i64>* - %tmp4 = load <8 x i64>, <8 x i64>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x i64>, ptr %tmp3, align 4 %tmp5 = icmp ult <8 x i64> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x i64> , <8 x i64> %tmp4 - %tmp7 = bitcast i64* %tmp2 to <8 x i64>* - store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x i64> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp ult i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4238,7 +4236,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_cmp_v4f32(float* %arg) { +define void @bcast_unfold_cmp_v4f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_cmp_v4f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -4260,13 +4258,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <4 x float>* - %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x float>, ptr %tmp3, align 4 %tmp5 = fcmp olt <4 x float> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x float> %tmp4, <4 x float> - %tmp7 = bitcast float* %tmp2 to <4 x float>* - store <4 x float> %tmp6, <4 x float>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x float> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4275,7 +4273,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_cmp_v8f32(float* %arg) { +define void @bcast_unfold_cmp_v8f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_cmp_v8f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -4298,13 +4296,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <8 x float>* - %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x float>, ptr %tmp3, align 4 %tmp5 = fcmp olt <8 x float> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x float> %tmp4, <8 x float> - %tmp7 = bitcast float* %tmp2 to <8 x float>* - store <8 x float> %tmp6, <8 x float>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x float> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4313,7 +4311,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_cmp_v16f32(float* %arg) { +define void @bcast_unfold_cmp_v16f32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_cmp_v16f32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -4336,13 +4334,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp - %tmp3 = bitcast float* %tmp2 to <16 x float>* - %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4 + %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <16 x float>, ptr %tmp3, align 4 %tmp5 = fcmp olt <16 x float> %tmp4, %tmp6 = select <16 x i1> %tmp5, <16 x float> %tmp4, <16 x float> - %tmp7 = bitcast float* %tmp2 to <16 x float>* - store <16 x float> %tmp6, <16 x float>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <16 x float> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 16 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4351,7 +4349,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_cmp_v2f64(double* %arg) { +define void @bcast_unfold_cmp_v2f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_cmp_v2f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -4375,13 +4373,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <2 x double>* - %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <2 x double>, ptr %tmp3, align 8 %tmp5 = fcmp olt <2 x double> %tmp4, %tmp6 = select <2 x i1> %tmp5, <2 x double> %tmp4, <2 x double> - %tmp7 = bitcast double* %tmp2 to <2 x double>* - store <2 x double> %tmp6, <2 x double>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <2 x double> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 2 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4390,7 +4388,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_cmp_v4f64(double* %arg) { +define void @bcast_unfold_cmp_v4f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_cmp_v4f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -4413,13 +4411,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <4 x double>* - %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x double>, ptr %tmp3, align 8 %tmp5 = fcmp olt <4 x double> %tmp4, %tmp6 = select <4 x i1> %tmp5, <4 x double> %tmp4, <4 x double> - %tmp7 = bitcast double* %tmp2 to <4 x double>* - store <4 x double> %tmp6, <4 x double>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x double> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4428,7 +4426,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_cmp_v8f64(double* %arg) { +define void @bcast_unfold_cmp_v8f64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_cmp_v8f64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -4451,13 +4449,13 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp - %tmp3 = bitcast double* %tmp2 to <8 x double>* - %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8 + %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <8 x double>, ptr %tmp3, align 8 %tmp5 = fcmp olt <8 x double> %tmp4, %tmp6 = select <8 x i1> %tmp5, <8 x double> %tmp4, <8 x double> - %tmp7 = bitcast double* %tmp2 to <8 x double>* - store <8 x double> %tmp6, <8 x double>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <8 x double> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 8 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4466,7 +4464,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_cmp_v8f32_refold(float* nocapture %0) { +define void @bcast_unfold_cmp_v8f32_refold(ptr nocapture %0) { ; CHECK-LABEL: bcast_unfold_cmp_v8f32_refold: ; CHECK: # %bb.0: ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -4486,13 +4484,13 @@ define void @bcast_unfold_cmp_v8f32_refold(float* nocapture %0) { 2: ; preds = %2, %1 %3 = phi i64 [ 0, %1 ], [ %10, %2 ] - %4 = getelementptr inbounds float, float* %0, i64 %3 - %5 = bitcast float* %4 to <8 x float>* - %6 = load <8 x float>, <8 x float>* %5, align 4 + %4 = getelementptr inbounds float, ptr %0, i64 %3 + %5 = bitcast ptr %4 to ptr + %6 = load <8 x float>, ptr %5, align 4 %7 = fcmp olt <8 x float> %6, %8 = select <8 x i1> %7, <8 x float> , <8 x float> - %9 = bitcast float* %4 to <8 x float>* - store <8 x float> %8, <8 x float>* %9, align 4 + %9 = bitcast ptr %4 to ptr + store <8 x float> %8, ptr %9, align 4 %10 = add i64 %3, 8 %11 = icmp eq i64 %10, 1024 br i1 %11, label %12, label %2 @@ -4501,7 +4499,7 @@ define void @bcast_unfold_cmp_v8f32_refold(float* nocapture %0) { ret void } -define void @bcast_unfold_ptestm_v4i32(i32* %arg) { +define void @bcast_unfold_ptestm_v4i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_ptestm_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -4522,14 +4520,14 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <4 x i32>* - %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i32>, ptr %tmp3, align 4 %tmp4b = and <4 x i32> %tmp4, %tmp5 = icmp ne <4 x i32> %tmp4b, zeroinitializer %tmp6 = select <4 x i1> %tmp5, <4 x i32> , <4 x i32> %tmp4 - %tmp7 = bitcast i32* %tmp2 to <4 x i32>* - store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4538,7 +4536,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_ptestnm_v4i32(i32* %arg) { +define void @bcast_unfold_ptestnm_v4i32(ptr %arg) { ; CHECK-LABEL: bcast_unfold_ptestnm_v4i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -4559,14 +4557,14 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp3 = bitcast i32* %tmp2 to <4 x i32>* - %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4 + %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i32>, ptr %tmp3, align 4 %tmp4b = and <4 x i32> %tmp4, %tmp5 = icmp eq <4 x i32> %tmp4b, zeroinitializer %tmp6 = select <4 x i1> %tmp5, <4 x i32> , <4 x i32> %tmp4 - %tmp7 = bitcast i32* %tmp2 to <4 x i32>* - store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i32> %tmp6, ptr %tmp7, align 4 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4575,7 +4573,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_ptestm_v4i64(i64* %arg) { +define void @bcast_unfold_ptestm_v4i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_ptestm_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -4597,14 +4595,14 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <4 x i64>* - %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 8 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i64>, ptr %tmp3, align 8 %tmp4b = and <4 x i64> %tmp4, %tmp5 = icmp ne <4 x i64> %tmp4b, zeroinitializer %tmp6 = select <4 x i1> %tmp5, <4 x i64> , <4 x i64> %tmp4 - %tmp7 = bitcast i64* %tmp2 to <4 x i64>* - store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4613,7 +4611,7 @@ bb10: ; preds = %bb1 ret void } -define void @bcast_unfold_ptestnm_v4i64(i64* %arg) { +define void @bcast_unfold_ptestnm_v4i64(ptr %arg) { ; CHECK-LABEL: bcast_unfold_ptestnm_v4i64: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-8192, %rax # imm = 0xE000 @@ -4635,14 +4633,14 @@ bb: bb1: ; preds = %bb1, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ] - %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp - %tmp3 = bitcast i64* %tmp2 to <4 x i64>* - %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 8 + %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp + %tmp3 = bitcast ptr %tmp2 to ptr + %tmp4 = load <4 x i64>, ptr %tmp3, align 8 %tmp4b = and <4 x i64> %tmp4, %tmp5 = icmp eq <4 x i64> %tmp4b, zeroinitializer %tmp6 = select <4 x i1> %tmp5, <4 x i64> , <4 x i64> %tmp4 - %tmp7 = bitcast i64* %tmp2 to <4 x i64>* - store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8 + %tmp7 = bitcast ptr %tmp2 to ptr + store <4 x i64> %tmp6, ptr %tmp7, align 8 %tmp8 = add i64 %tmp, 4 %tmp9 = icmp eq i64 %tmp8, 1024 br i1 %tmp9, label %bb10, label %bb1 @@ -4654,7 +4652,7 @@ bb10: ; preds = %bb1 ; The or/and pattern here should be turned into vpternlog. The multiply is ; there to increase the use count of the loads so they can't fold. We want to ; unfold the broadcast and pull it out of the loop. -define void @bcast_unfold_vpternlog_v16i32(i32* %arg, i32* %arg1) { +define void @bcast_unfold_vpternlog_v16i32(ptr %arg, ptr %arg1) { ; CHECK-LABEL: bcast_unfold_vpternlog_v16i32: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000 @@ -4678,19 +4676,19 @@ bb: bb2: ; preds = %bb2, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp18, %bb2 ] - %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp - %tmp4 = bitcast i32* %tmp3 to <16 x i32>* - %tmp5 = load <16 x i32>, <16 x i32>* %tmp4, align 4 - %tmp6 = getelementptr inbounds i32, i32* %arg1, i64 %tmp - %tmp10 = bitcast i32* %tmp6 to <16 x i32>* - %tmp11 = load <16 x i32>, <16 x i32>* %tmp10, align 4 + %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp + %tmp4 = bitcast ptr %tmp3 to ptr + %tmp5 = load <16 x i32>, ptr %tmp4, align 4 + %tmp6 = getelementptr inbounds i32, ptr %arg1, i64 %tmp + %tmp10 = bitcast ptr %tmp6 to ptr + %tmp11 = load <16 x i32>, ptr %tmp10, align 4 %tmp12 = and <16 x i32> %tmp5, %tmp13 = and <16 x i32> %tmp11, %tmp14 = or <16 x i32> %tmp12, %tmp13 %tmp15 = mul <16 x i32> %tmp14, %tmp5 %tmp16 = mul <16 x i32> %tmp15, %tmp11 - %tmp17 = bitcast i32* %tmp3 to <16 x i32>* - store <16 x i32> %tmp16, <16 x i32>* %tmp17, align 4 + %tmp17 = bitcast ptr %tmp3 to ptr + store <16 x i32> %tmp16, ptr %tmp17, align 4 %tmp18 = add i64 %tmp, 16 %tmp19 = icmp eq i64 %tmp18, 1024 br i1 %tmp19, label %bb20, label %bb2 diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll index b2a0344392c9b..156e9a8a1edf0 100644 --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -1,11 +1,9 @@ -; ifndef INTEL_SYCL_OPAQUEPOINTER_READY ; RUN: llc -mtriple=i686-linux -pre-RA-sched=source < %s | FileCheck %s ; RUN: opt -disable-output -passes=debugify < %s -; end declare void @error(i32 %i, i32 %a, i32 %b) -define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) { +define i32 @test_ifchains(i32 %i, ptr %a, i32 %b) { ; Test a chain of ifs, where the block guarded by the if is error handling code ; that is not expected to run. ; CHECK-LABEL: test_ifchains: @@ -27,8 +25,8 @@ define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) { ; CHECK: %then5 entry: - %gep1 = getelementptr i32, i32* %a, i32 1 - %val1 = load i32, i32* %gep1 + %gep1 = getelementptr i32, ptr %a, i32 1 + %val1 = load i32, ptr %gep1 %cond1 = icmp ugt i32 %val1, 1 br i1 %cond1, label %then1, label %else1, !prof !0 @@ -37,8 +35,8 @@ then1: br label %else1 else1: - %gep2 = getelementptr i32, i32* %a, i32 2 - %val2 = load i32, i32* %gep2 + %gep2 = getelementptr i32, ptr %a, i32 2 + %val2 = load i32, ptr %gep2 %cond2 = icmp ugt i32 %val2, 2 br i1 %cond2, label %then2, label %else2, !prof !0 @@ -47,8 +45,8 @@ then2: br label %else2 else2: - %gep3 = getelementptr i32, i32* %a, i32 3 - %val3 = load i32, i32* %gep3 + %gep3 = getelementptr i32, ptr %a, i32 3 + %val3 = load i32, ptr %gep3 %cond3 = icmp ugt i32 %val3, 3 br i1 %cond3, label %then3, label %else3, !prof !0 @@ -57,8 +55,8 @@ then3: br label %else3 else3: - %gep4 = getelementptr i32, i32* %a, i32 4 - %val4 = load i32, i32* %gep4 + %gep4 = getelementptr i32, ptr %a, i32 4 + %val4 = load i32, ptr %gep4 %cond4 = icmp ugt i32 %val4, 4 br i1 %cond4, label %then4, label %else4, !prof !0 @@ -67,8 +65,8 @@ then4: br label %else4 else4: - %gep5 = getelementptr i32, i32* %a, i32 3 - %val5 = load i32, i32* %gep5 + %gep5 = getelementptr i32, ptr %a, i32 3 + %val5 = load i32, ptr %gep5 %cond5 = icmp ugt i32 %val5, 3 br i1 %cond5, label %then5, label %exit, !prof !0 @@ -80,7 +78,7 @@ exit: ret i32 %b } -define i32 @test_loop_cold_blocks(i32 %i, i32* %a) { +define i32 @test_loop_cold_blocks(i32 %i, ptr %a) { ; Check that we sink cold loop blocks after the hot loop body. ; CHECK-LABEL: test_loop_cold_blocks: ; CHECK: %entry @@ -116,8 +114,8 @@ unlikely2: br label %body3 body3: - %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv - %0 = load i32, i32* %arrayidx + %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv + %0 = load i32, ptr %arrayidx %sum = add nsw i32 %0, %base %next = add i32 %iv, 1 %exitcond = icmp eq i32 %next, %i @@ -129,7 +127,7 @@ exit: !0 = !{!"branch_weights", i32 1, i32 64} -define i32 @test_loop_early_exits(i32 %i, i32* %a) { +define i32 @test_loop_early_exits(i32 %i, ptr %a) { ; Check that we sink early exit blocks out of loop bodies. ; CHECK-LABEL: test_loop_early_exits: ; CHECK: %entry @@ -169,8 +167,8 @@ bail3: ret i32 -3 body4: - %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv - %0 = load i32, i32* %arrayidx + %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv + %0 = load i32, ptr %arrayidx %sum = add nsw i32 %0, %base %next = add i32 %iv, 1 %exitcond = icmp eq i32 %next, %i @@ -186,7 +184,7 @@ exit: ; duplicated, we add some calls to dummy. declare void @dummy() -define i32 @test_loop_rotate(i32 %i, i32* %a) { +define i32 @test_loop_rotate(i32 %i, ptr %a) { ; Check that we rotate conditional exits from the loop to the bottom of the ; loop, eliminating unconditional branches to the top. ; CHECK-LABEL: test_loop_rotate: @@ -208,8 +206,8 @@ body0: br i1 %exitcond, label %exit, label %body1 body1: - %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv - %0 = load i32, i32* %arrayidx + %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv + %0 = load i32, ptr %arrayidx %sum = add nsw i32 %0, %base %bailcond1 = icmp eq i32 %sum, 42 br label %body0 @@ -218,7 +216,7 @@ exit: ret i32 %base } -define i32 @test_no_loop_rotate(i32 %i, i32* %a) { +define i32 @test_no_loop_rotate(i32 %i, ptr %a) { ; Check that we don't try to rotate a loop which is already laid out with ; fallthrough opportunities into the top and out of the bottom. ; CHECK-LABEL: test_no_loop_rotate: @@ -233,8 +231,8 @@ entry: body0: %iv = phi i32 [ 0, %entry ], [ %next, %body1 ] %base = phi i32 [ 0, %entry ], [ %sum, %body1 ] - %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv - %0 = load i32, i32* %arrayidx + %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv + %0 = load i32, ptr %arrayidx %sum = add nsw i32 %0, %base %bailcond1 = icmp eq i32 %sum, 42 br i1 %bailcond1, label %exit, label %body1 @@ -248,7 +246,7 @@ exit: ret i32 %base } -define i32 @test_loop_align(i32 %i, i32* %a) { +define i32 @test_loop_align(i32 %i, ptr %a) { ; Check that we provide basic loop body alignment with the block placement ; pass. ; CHECK-LABEL: test_loop_align: @@ -263,8 +261,8 @@ entry: body: %iv = phi i32 [ 0, %entry ], [ %next, %body ] %base = phi i32 [ 0, %entry ], [ %sum, %body ] - %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv - %0 = load i32, i32* %arrayidx + %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv + %0 = load i32, ptr %arrayidx %sum = add nsw i32 %0, %base %next = add i32 %iv, 1 %exitcond = icmp eq i32 %next, %i @@ -274,7 +272,7 @@ exit: ret i32 %sum } -define i32 @test_nested_loop_align(i32 %i, i32* %a, i32* %b) { +define i32 @test_nested_loop_align(i32 %i, ptr %a, ptr %b) { ; Check that we provide nested loop body alignment. ; CHECK-LABEL: test_nested_loop_align: ; CHECK: %entry @@ -290,16 +288,16 @@ entry: loop.body.1: %iv = phi i32 [ 0, %entry ], [ %next, %loop.body.2 ] - %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv - %bidx = load i32, i32* %arrayidx + %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv + %bidx = load i32, ptr %arrayidx br label %inner.loop.body inner.loop.body: %inner.iv = phi i32 [ 0, %loop.body.1 ], [ %inner.next, %inner.loop.body ] %base = phi i32 [ 0, %loop.body.1 ], [ %sum, %inner.loop.body ] %scaled_idx = mul i32 %bidx, %iv - %inner.arrayidx = getelementptr inbounds i32, i32* %b, i32 %scaled_idx - %0 = load i32, i32* %inner.arrayidx + %inner.arrayidx = getelementptr inbounds i32, ptr %b, i32 %scaled_idx + %0 = load i32, ptr %inner.arrayidx %sum = add nsw i32 %0, %base %inner.next = add i32 %iv, 1 %inner.exitcond = icmp eq i32 %inner.next, %i @@ -333,13 +331,13 @@ loop.body1: br i1 undef, label %loop.body3, label %loop.body2 loop.body2: - %ptr = load i32*, i32** undef, align 4 + %ptr = load ptr, ptr undef, align 4 br label %loop.body3 loop.body3: - %myptr = phi i32* [ %ptr2, %loop.body5 ], [ %ptr, %loop.body2 ], [ undef, %loop.body1 ] - %bcmyptr = bitcast i32* %myptr to i32* - %val = load i32, i32* %bcmyptr, align 4 + %myptr = phi ptr [ %ptr2, %loop.body5 ], [ %ptr, %loop.body2 ], [ undef, %loop.body1 ] + %bcmyptr = bitcast ptr %myptr to ptr + %val = load i32, ptr %bcmyptr, align 4 %comp = icmp eq i32 %val, 48 br i1 %comp, label %loop.body4, label %loop.body5 @@ -347,11 +345,11 @@ loop.body4: br i1 undef, label %loop.header, label %loop.body5 loop.body5: - %ptr2 = load i32*, i32** undef, align 4 + %ptr2 = load ptr, ptr undef, align 4 br label %loop.body3 } -define void @unnatural_cfg2(i32* %p0, i32 %a0) { +define void @unnatural_cfg2(ptr %p0, i32 %a0) { ; Test that we can handle a loop with a nested natural loop *and* an unnatural ; loop. This was reduced from a crash on block placement when run over ; single-source GCC. @@ -371,32 +369,32 @@ entry: br label %loop.header loop.header: - %comp0 = icmp eq i32* %p0, null + %comp0 = icmp eq ptr %p0, null br i1 %comp0, label %bail, label %loop.body1 loop.body1: - %val0 = load i32*, i32** undef, align 4 + %val0 = load ptr, ptr undef, align 4 br i1 undef, label %loop.body2, label %loop.inner1.begin loop.body2: br i1 undef, label %loop.body4, label %loop.body3 loop.body3: - %ptr1 = getelementptr inbounds i32, i32* %val0, i32 0 - %castptr1 = bitcast i32* %ptr1 to i32** - %val1 = load i32*, i32** %castptr1, align 4 + %ptr1 = getelementptr inbounds i32, ptr %val0, i32 0 + %castptr1 = bitcast ptr %ptr1 to ptr + %val1 = load ptr, ptr %castptr1, align 4 br label %loop.inner1.begin loop.inner1.begin: - %valphi = phi i32* [ %val2, %loop.inner1.end ], [ %val1, %loop.body3 ], [ %val0, %loop.body1 ] - %castval = bitcast i32* %valphi to i32* + %valphi = phi ptr [ %val2, %loop.inner1.end ], [ %val1, %loop.body3 ], [ %val0, %loop.body1 ] + %castval = bitcast ptr %valphi to ptr %comp1 = icmp eq i32 %a0, 48 br i1 %comp1, label %loop.inner1.end, label %loop.body4 loop.inner1.end: - %ptr2 = getelementptr inbounds i32, i32* %valphi, i32 0 - %castptr2 = bitcast i32* %ptr2 to i32** - %val2 = load i32*, i32** %castptr2, align 4 + %ptr2 = getelementptr inbounds i32, ptr %valphi, i32 0 + %castptr2 = bitcast ptr %ptr2 to ptr + %val2 = load ptr, ptr %castptr2, align 4 br label %loop.inner1.begin loop.body4.dead: @@ -493,7 +491,7 @@ entry: br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1 entry.if.then_crit_edge: - %.pre14 = load i8, i8* undef, align 1 + %.pre14 = load i8, ptr undef, align 1 br label %if.then lor.lhs.false: @@ -506,7 +504,7 @@ exit: if.then: %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ] %1 = and i8 %0, 1 - store i8 %1, i8* undef, align 4 + store i8 %1, ptr undef, align 4 br label %if.end if.end: @@ -554,7 +552,7 @@ exit: declare i32 @__gxx_personality_v0(...) -define void @test_eh_lpad_successor() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define void @test_eh_lpad_successor() personality ptr bitcast (i32 (...)* @__gxx_personality_v0 to ptr) { ; Some times the landing pad ends up as the first successor of an invoke block. ; When this happens, a strange result used to fall out of updateTerminators: we ; didn't correctly locate the fallthrough successor, assuming blindly that the @@ -572,9 +570,9 @@ preheader: br label %loop lpad: - %lpad.val = landingpad { i8*, i32 } + %lpad.val = landingpad { ptr, i32 } cleanup - resume { i8*, i32 } %lpad.val + resume { ptr, i32 } %lpad.val loop: br label %loop @@ -582,7 +580,7 @@ loop: declare void @fake_throw() noreturn -define void @test_eh_throw() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define void @test_eh_throw() personality ptr bitcast (i32 (...)* @__gxx_personality_v0 to ptr) { ; For blocks containing a 'throw' (or similar functionality), we have ; a no-return invoke. In this case, only EH successors will exist, and ; fallthrough simply won't occur. Make sure we don't crash trying to update @@ -599,7 +597,7 @@ continue: unreachable cleanup: - %0 = landingpad { i8*, i32 } + %0 = landingpad { ptr, i32 } cleanup unreachable } @@ -622,24 +620,24 @@ body: br label %loop2a loop1: - %next.load = load i32*, i32** undef + %next.load = load ptr, ptr undef br i1 %comp.a, label %loop2a, label %loop2b loop2a: - %var = phi i32* [ null, %entry ], [ null, %body ], [ %next.phi, %loop1 ] - %next.var = phi i32* [ null, %entry ], [ undef, %body ], [ %next.load, %loop1 ] - %comp.a = icmp eq i32* %var, null + %var = phi ptr [ null, %entry ], [ null, %body ], [ %next.phi, %loop1 ] + %next.var = phi ptr [ null, %entry ], [ undef, %body ], [ %next.load, %loop1 ] + %comp.a = icmp eq ptr %var, null br label %loop3 loop2b: - %gep = getelementptr inbounds i32, i32* %var.phi, i32 0 - %next.ptr = bitcast i32* %gep to i32** - store i32* %next.phi, i32** %next.ptr + %gep = getelementptr inbounds i32, ptr %var.phi, i32 0 + %next.ptr = bitcast ptr %gep to ptr + store ptr %next.phi, ptr %next.ptr br label %loop3 loop3: - %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ] - %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ] + %var.phi = phi ptr [ %next.phi, %loop2b ], [ %var, %loop2a ] + %next.phi = phi ptr [ %next.load, %loop2b ], [ %next.var, %loop2a ] br label %loop1 } @@ -737,199 +735,199 @@ define void @many_unanalyzable_branches() { entry: br label %0 - %val0 = load volatile float, float* undef + %val0 = load volatile float, ptr undef %cmp0 = fcmp une float %val0, 0.0 br i1 %cmp0, label %1, label %0 - %val1 = load volatile float, float* undef + %val1 = load volatile float, ptr undef %cmp1 = fcmp une float %val1, 0.0 br i1 %cmp1, label %2, label %1 - %val2 = load volatile float, float* undef + %val2 = load volatile float, ptr undef %cmp2 = fcmp une float %val2, 0.0 br i1 %cmp2, label %3, label %2 - %val3 = load volatile float, float* undef + %val3 = load volatile float, ptr undef %cmp3 = fcmp une float %val3, 0.0 br i1 %cmp3, label %4, label %3 - %val4 = load volatile float, float* undef + %val4 = load volatile float, ptr undef %cmp4 = fcmp une float %val4, 0.0 br i1 %cmp4, label %5, label %4 - %val5 = load volatile float, float* undef + %val5 = load volatile float, ptr undef %cmp5 = fcmp une float %val5, 0.0 br i1 %cmp5, label %6, label %5 - %val6 = load volatile float, float* undef + %val6 = load volatile float, ptr undef %cmp6 = fcmp une float %val6, 0.0 br i1 %cmp6, label %7, label %6 - %val7 = load volatile float, float* undef + %val7 = load volatile float, ptr undef %cmp7 = fcmp une float %val7, 0.0 br i1 %cmp7, label %8, label %7 - %val8 = load volatile float, float* undef + %val8 = load volatile float, ptr undef %cmp8 = fcmp une float %val8, 0.0 br i1 %cmp8, label %9, label %8 - %val9 = load volatile float, float* undef + %val9 = load volatile float, ptr undef %cmp9 = fcmp une float %val9, 0.0 br i1 %cmp9, label %10, label %9 - %val10 = load volatile float, float* undef + %val10 = load volatile float, ptr undef %cmp10 = fcmp une float %val10, 0.0 br i1 %cmp10, label %11, label %10 - %val11 = load volatile float, float* undef + %val11 = load volatile float, ptr undef %cmp11 = fcmp une float %val11, 0.0 br i1 %cmp11, label %12, label %11 - %val12 = load volatile float, float* undef + %val12 = load volatile float, ptr undef %cmp12 = fcmp une float %val12, 0.0 br i1 %cmp12, label %13, label %12 - %val13 = load volatile float, float* undef + %val13 = load volatile float, ptr undef %cmp13 = fcmp une float %val13, 0.0 br i1 %cmp13, label %14, label %13 - %val14 = load volatile float, float* undef + %val14 = load volatile float, ptr undef %cmp14 = fcmp une float %val14, 0.0 br i1 %cmp14, label %15, label %14 - %val15 = load volatile float, float* undef + %val15 = load volatile float, ptr undef %cmp15 = fcmp une float %val15, 0.0 br i1 %cmp15, label %16, label %15 - %val16 = load volatile float, float* undef + %val16 = load volatile float, ptr undef %cmp16 = fcmp une float %val16, 0.0 br i1 %cmp16, label %17, label %16 - %val17 = load volatile float, float* undef + %val17 = load volatile float, ptr undef %cmp17 = fcmp une float %val17, 0.0 br i1 %cmp17, label %18, label %17 - %val18 = load volatile float, float* undef + %val18 = load volatile float, ptr undef %cmp18 = fcmp une float %val18, 0.0 br i1 %cmp18, label %19, label %18 - %val19 = load volatile float, float* undef + %val19 = load volatile float, ptr undef %cmp19 = fcmp une float %val19, 0.0 br i1 %cmp19, label %20, label %19 - %val20 = load volatile float, float* undef + %val20 = load volatile float, ptr undef %cmp20 = fcmp une float %val20, 0.0 br i1 %cmp20, label %21, label %20 - %val21 = load volatile float, float* undef + %val21 = load volatile float, ptr undef %cmp21 = fcmp une float %val21, 0.0 br i1 %cmp21, label %22, label %21 - %val22 = load volatile float, float* undef + %val22 = load volatile float, ptr undef %cmp22 = fcmp une float %val22, 0.0 br i1 %cmp22, label %23, label %22 - %val23 = load volatile float, float* undef + %val23 = load volatile float, ptr undef %cmp23 = fcmp une float %val23, 0.0 br i1 %cmp23, label %24, label %23 - %val24 = load volatile float, float* undef + %val24 = load volatile float, ptr undef %cmp24 = fcmp une float %val24, 0.0 br i1 %cmp24, label %25, label %24 - %val25 = load volatile float, float* undef + %val25 = load volatile float, ptr undef %cmp25 = fcmp une float %val25, 0.0 br i1 %cmp25, label %26, label %25 - %val26 = load volatile float, float* undef + %val26 = load volatile float, ptr undef %cmp26 = fcmp une float %val26, 0.0 br i1 %cmp26, label %27, label %26 - %val27 = load volatile float, float* undef + %val27 = load volatile float, ptr undef %cmp27 = fcmp une float %val27, 0.0 br i1 %cmp27, label %28, label %27 - %val28 = load volatile float, float* undef + %val28 = load volatile float, ptr undef %cmp28 = fcmp une float %val28, 0.0 br i1 %cmp28, label %29, label %28 - %val29 = load volatile float, float* undef + %val29 = load volatile float, ptr undef %cmp29 = fcmp une float %val29, 0.0 br i1 %cmp29, label %30, label %29 - %val30 = load volatile float, float* undef + %val30 = load volatile float, ptr undef %cmp30 = fcmp une float %val30, 0.0 br i1 %cmp30, label %31, label %30 - %val31 = load volatile float, float* undef + %val31 = load volatile float, ptr undef %cmp31 = fcmp une float %val31, 0.0 br i1 %cmp31, label %32, label %31 - %val32 = load volatile float, float* undef + %val32 = load volatile float, ptr undef %cmp32 = fcmp une float %val32, 0.0 br i1 %cmp32, label %33, label %32 - %val33 = load volatile float, float* undef + %val33 = load volatile float, ptr undef %cmp33 = fcmp une float %val33, 0.0 br i1 %cmp33, label %34, label %33 - %val34 = load volatile float, float* undef + %val34 = load volatile float, ptr undef %cmp34 = fcmp une float %val34, 0.0 br i1 %cmp34, label %35, label %34 - %val35 = load volatile float, float* undef + %val35 = load volatile float, ptr undef %cmp35 = fcmp une float %val35, 0.0 br i1 %cmp35, label %36, label %35 - %val36 = load volatile float, float* undef + %val36 = load volatile float, ptr undef %cmp36 = fcmp une float %val36, 0.0 br i1 %cmp36, label %37, label %36 - %val37 = load volatile float, float* undef + %val37 = load volatile float, ptr undef %cmp37 = fcmp une float %val37, 0.0 br i1 %cmp37, label %38, label %37 - %val38 = load volatile float, float* undef + %val38 = load volatile float, ptr undef %cmp38 = fcmp une float %val38, 0.0 br i1 %cmp38, label %39, label %38 - %val39 = load volatile float, float* undef + %val39 = load volatile float, ptr undef %cmp39 = fcmp une float %val39, 0.0 br i1 %cmp39, label %40, label %39 - %val40 = load volatile float, float* undef + %val40 = load volatile float, ptr undef %cmp40 = fcmp une float %val40, 0.0 br i1 %cmp40, label %41, label %40 - %val41 = load volatile float, float* undef + %val41 = load volatile float, ptr undef %cmp41 = fcmp une float %val41, undef br i1 %cmp41, label %42, label %41 - %val42 = load volatile float, float* undef + %val42 = load volatile float, ptr undef %cmp42 = fcmp une float %val42, 0.0 br i1 %cmp42, label %43, label %42 - %val43 = load volatile float, float* undef + %val43 = load volatile float, ptr undef %cmp43 = fcmp une float %val43, 0.0 br i1 %cmp43, label %44, label %43 - %val44 = load volatile float, float* undef + %val44 = load volatile float, ptr undef %cmp44 = fcmp une float %val44, 0.0 br i1 %cmp44, label %45, label %44 - %val45 = load volatile float, float* undef + %val45 = load volatile float, ptr undef %cmp45 = fcmp une float %val45, 0.0 br i1 %cmp45, label %46, label %45 - %val46 = load volatile float, float* undef + %val46 = load volatile float, ptr undef %cmp46 = fcmp une float %val46, 0.0 br i1 %cmp46, label %47, label %46 - %val47 = load volatile float, float* undef + %val47 = load volatile float, ptr undef %cmp47 = fcmp une float %val47, 0.0 br i1 %cmp47, label %48, label %47 - %val48 = load volatile float, float* undef + %val48 = load volatile float, ptr undef %cmp48 = fcmp une float %val48, 0.0 br i1 %cmp48, label %49, label %48 - %val49 = load volatile float, float* undef + %val49 = load volatile float, ptr undef %cmp49 = fcmp une float %val49, 0.0 br i1 %cmp49, label %50, label %49 - %val50 = load volatile float, float* undef + %val50 = load volatile float, ptr undef %cmp50 = fcmp une float %val50, 0.0 br i1 %cmp50, label %51, label %50 - %val51 = load volatile float, float* undef + %val51 = load volatile float, ptr undef %cmp51 = fcmp une float %val51, 0.0 br i1 %cmp51, label %52, label %51 - %val52 = load volatile float, float* undef + %val52 = load volatile float, ptr undef %cmp52 = fcmp une float %val52, 0.0 br i1 %cmp52, label %53, label %52 - %val53 = load volatile float, float* undef + %val53 = load volatile float, ptr undef %cmp53 = fcmp une float %val53, 0.0 br i1 %cmp53, label %54, label %53 - %val54 = load volatile float, float* undef + %val54 = load volatile float, ptr undef %cmp54 = fcmp une float %val54, 0.0 br i1 %cmp54, label %55, label %54 - %val55 = load volatile float, float* undef + %val55 = load volatile float, ptr undef %cmp55 = fcmp une float %val55, 0.0 br i1 %cmp55, label %56, label %55 - %val56 = load volatile float, float* undef + %val56 = load volatile float, ptr undef %cmp56 = fcmp une float %val56, 0.0 br i1 %cmp56, label %57, label %56 - %val57 = load volatile float, float* undef + %val57 = load volatile float, ptr undef %cmp57 = fcmp une float %val57, 0.0 br i1 %cmp57, label %58, label %57 - %val58 = load volatile float, float* undef + %val58 = load volatile float, ptr undef %cmp58 = fcmp une float %val58, 0.0 br i1 %cmp58, label %59, label %58 - %val59 = load volatile float, float* undef + %val59 = load volatile float, ptr undef %cmp59 = fcmp une float %val59, 0.0 br i1 %cmp59, label %60, label %59 - %val60 = load volatile float, float* undef + %val60 = load volatile float, ptr undef %cmp60 = fcmp une float %val60, 0.0 br i1 %cmp60, label %61, label %60 - %val61 = load volatile float, float* undef + %val61 = load volatile float, ptr undef %cmp61 = fcmp une float %val61, 0.0 br i1 %cmp61, label %62, label %61 - %val62 = load volatile float, float* undef + %val62 = load volatile float, ptr undef %cmp62 = fcmp une float %val62, 0.0 br i1 %cmp62, label %63, label %62 - %val63 = load volatile float, float* undef + %val63 = load volatile float, ptr undef %cmp63 = fcmp une float %val63, 0.0 br i1 %cmp63, label %64, label %63 - %val64 = load volatile float, float* undef + %val64 = load volatile float, ptr undef %cmp64 = fcmp une float %val64, 0.0 br i1 %cmp64, label %65, label %64 @@ -938,7 +936,7 @@ exit: ret void } -define void @benchmark_heapsort(i32 %n, double* nocapture %ra) { +define void @benchmark_heapsort(i32 %n, ptr nocapture %ra) { ; This test case comes from the heapsort benchmark, and exemplifies several ; important aspects to block placement in the presence of loops: ; 1) Loop rotation needs to *ensure* that the desired exiting edge can be @@ -974,7 +972,7 @@ define void @benchmark_heapsort(i32 %n, double* nocapture %ra) { entry: %shr = ashr i32 %n, 1 %add = add nsw i32 %shr, 1 - %arrayidx3 = getelementptr inbounds double, double* %ra, i64 1 + %arrayidx3 = getelementptr inbounds double, ptr %ra, i64 1 br label %for.cond for.cond: @@ -986,22 +984,22 @@ for.cond: if.then: %dec = add nsw i32 %l.0, -1 %idxprom = sext i32 %dec to i64 - %arrayidx = getelementptr inbounds double, double* %ra, i64 %idxprom - %0 = load double, double* %arrayidx, align 8 + %arrayidx = getelementptr inbounds double, ptr %ra, i64 %idxprom + %0 = load double, ptr %arrayidx, align 8 br label %if.end10 if.else: %idxprom1 = sext i32 %ir.0 to i64 - %arrayidx2 = getelementptr inbounds double, double* %ra, i64 %idxprom1 - %1 = load double, double* %arrayidx2, align 8 - %2 = load double, double* %arrayidx3, align 8 - store double %2, double* %arrayidx2, align 8 + %arrayidx2 = getelementptr inbounds double, ptr %ra, i64 %idxprom1 + %1 = load double, ptr %arrayidx2, align 8 + %2 = load double, ptr %arrayidx3, align 8 + store double %2, ptr %arrayidx2, align 8 %dec6 = add nsw i32 %ir.0, -1 %cmp7 = icmp eq i32 %dec6, 1 br i1 %cmp7, label %if.then8, label %if.end10 if.then8: - store double %1, double* %arrayidx3, align 8 + store double %1, ptr %arrayidx3, align 8 ret void if.end10: @@ -1027,12 +1025,12 @@ while.body: land.lhs.true: %idxprom13 = sext i32 %j.0 to i64 - %arrayidx14 = getelementptr inbounds double, double* %ra, i64 %idxprom13 - %3 = load double, double* %arrayidx14, align 8 + %arrayidx14 = getelementptr inbounds double, ptr %ra, i64 %idxprom13 + %3 = load double, ptr %arrayidx14, align 8 %add15 = add nsw i32 %j.0, 1 %idxprom16 = sext i32 %add15 to i64 - %arrayidx17 = getelementptr inbounds double, double* %ra, i64 %idxprom16 - %4 = load double, double* %arrayidx17, align 8 + %arrayidx17 = getelementptr inbounds double, ptr %ra, i64 %idxprom16 + %4 = load double, ptr %arrayidx17, align 8 %cmp18 = fcmp olt double %3, %4 br i1 %cmp18, label %if.then19, label %if.end20 @@ -1042,27 +1040,27 @@ if.then19: if.end20: %j.1 = phi i32 [ %add15, %if.then19 ], [ %j.0, %land.lhs.true ], [ %j.0, %while.body ] %idxprom21 = sext i32 %j.1 to i64 - %arrayidx22 = getelementptr inbounds double, double* %ra, i64 %idxprom21 - %5 = load double, double* %arrayidx22, align 8 + %arrayidx22 = getelementptr inbounds double, ptr %ra, i64 %idxprom21 + %5 = load double, ptr %arrayidx22, align 8 %cmp23 = fcmp olt double %rra.0, %5 br i1 %cmp23, label %if.then24, label %while.cond if.then24: %idxprom27 = sext i32 %j.0.ph.in to i64 - %arrayidx28 = getelementptr inbounds double, double* %ra, i64 %idxprom27 - store double %5, double* %arrayidx28, align 8 + %arrayidx28 = getelementptr inbounds double, ptr %ra, i64 %idxprom27 + store double %5, ptr %arrayidx28, align 8 br label %while.cond.outer while.end: %idxprom33 = sext i32 %j.0.ph.in to i64 - %arrayidx34 = getelementptr inbounds double, double* %ra, i64 %idxprom33 - store double %rra.0, double* %arrayidx34, align 8 + %arrayidx34 = getelementptr inbounds double, ptr %ra, i64 %idxprom33 + store double %rra.0, ptr %arrayidx34, align 8 br label %for.cond } declare void @cold_function() cold -define i32 @test_cold_calls(i32* %a) { +define i32 @test_cold_calls(ptr %a) { ; Test that edges to blocks post-dominated by cold calls are ; marked as not expected to be taken. They should be laid out ; at the bottom. @@ -1073,8 +1071,8 @@ define i32 @test_cold_calls(i32* %a) { ; CHECK: %then entry: - %gep1 = getelementptr i32, i32* %a, i32 1 - %val1 = load i32, i32* %gep1 + %gep1 = getelementptr i32, ptr %a, i32 1 + %val1 = load i32, ptr %gep1 %cond1 = icmp ugt i32 %val1, 1 br i1 %cond1, label %then, label %else @@ -1083,8 +1081,8 @@ then: br label %exit else: - %gep2 = getelementptr i32, i32* %a, i32 2 - %val2 = load i32, i32* %gep2 + %gep2 = getelementptr i32, ptr %a, i32 2 + %val2 = load i32, ptr %gep2 br label %exit exit: @@ -1125,12 +1123,12 @@ then: ret i32 %3 hotlp: - %4 = landingpad { i8*, i32 } + %4 = landingpad { ptr, i32 } cleanup br label %lpret coldlp: - %5 = landingpad { i8*, i32 } + %5 = landingpad { ptr, i32 } cleanup br label %lpret @@ -1166,19 +1164,19 @@ exit: ret void innerlp: - %2 = landingpad { i8*, i32 } + %2 = landingpad { ptr, i32 } cleanup br label %innercleanup outerlp: - %3 = landingpad { i8*, i32 } + %3 = landingpad { ptr, i32 } cleanup br label %outercleanup outercleanup: - %4 = phi { i8*, i32 } [%2, %innercleanup], [%3, %outerlp] + %4 = phi { ptr, i32 } [%2, %innercleanup], [%3, %outerlp] call void @clean() - resume { i8*, i32 } %4 + resume { ptr, i32 } %4 innercleanup: call void @clean() @@ -1187,7 +1185,7 @@ innercleanup: declare void @hot_function() -define void @test_hot_branch(i32* %a) { +define void @test_hot_branch(ptr %a) { ; Test that a hot branch that has a probability a little larger than 80% will ; break CFG constrains when doing block placement. ; CHECK-LABEL: test_hot_branch: @@ -1197,8 +1195,8 @@ define void @test_hot_branch(i32* %a) { ; CHECK: %else entry: - %gep1 = getelementptr i32, i32* %a, i32 1 - %val1 = load i32, i32* %gep1 + %gep1 = getelementptr i32, ptr %a, i32 1 + %val1 = load i32, ptr %gep1 %cond1 = icmp ugt i32 %val1, 1 br i1 %cond1, label %then, label %else, !prof !5 @@ -1215,7 +1213,7 @@ exit: ret void } -define void @test_hot_branch_profile(i32* %a) !prof !6 { +define void @test_hot_branch_profile(ptr %a) !prof !6 { ; Test that a hot branch that has a probability a little larger than 50% will ; break CFG constrains when doing block placement when profile is available. ; CHECK-LABEL: test_hot_branch_profile: @@ -1225,8 +1223,8 @@ define void @test_hot_branch_profile(i32* %a) !prof !6 { ; CHECK: %else entry: - %gep1 = getelementptr i32, i32* %a, i32 1 - %val1 = load i32, i32* %gep1 + %gep1 = getelementptr i32, ptr %a, i32 1 + %val1 = load i32, ptr %gep1 %cond1 = icmp ugt i32 %val1, 1 br i1 %cond1, label %then, label %else, !prof !7 @@ -1243,7 +1241,7 @@ exit: ret void } -define void @test_hot_branch_triangle_profile(i32* %a) !prof !6 { +define void @test_hot_branch_triangle_profile(ptr %a) !prof !6 { ; Test that a hot branch that has a probability a little larger than 80% will ; break triangle shaped CFG constrains when doing block placement if profile ; is present. @@ -1253,8 +1251,8 @@ define void @test_hot_branch_triangle_profile(i32* %a) !prof !6 { ; CHECK: %then entry: - %gep1 = getelementptr i32, i32* %a, i32 1 - %val1 = load i32, i32* %gep1 + %gep1 = getelementptr i32, ptr %a, i32 1 + %val1 = load i32, ptr %gep1 %cond1 = icmp ugt i32 %val1, 1 br i1 %cond1, label %exit, label %then, !prof !5 @@ -1267,7 +1265,7 @@ exit: ret void } -define void @test_hot_branch_triangle_profile_topology(i32* %a) !prof !6 { +define void @test_hot_branch_triangle_profile_topology(ptr %a) !prof !6 { ; Test that a hot branch that has a probability between 50% and 66% will not ; break triangle shaped CFG constrains when doing block placement if profile ; is present. @@ -1277,8 +1275,8 @@ define void @test_hot_branch_triangle_profile_topology(i32* %a) !prof !6 { ; CHECK: %exit entry: - %gep1 = getelementptr i32, i32* %a, i32 1 - %val1 = load i32, i32* %gep1 + %gep1 = getelementptr i32, ptr %a, i32 1 + %val1 = load i32, ptr %gep1 %cond1 = icmp ugt i32 %val1, 1 br i1 %cond1, label %exit, label %then, !prof !7 @@ -1294,7 +1292,7 @@ exit: declare void @a() declare void @b() -define void @test_forked_hot_diamond(i32* %a) { +define void @test_forked_hot_diamond(ptr %a) { ; Test that a hot-branch with probability > 80% followed by a 50/50 branch ; will not place the cold predecessor if the probability for the fallthrough ; remains above 80% @@ -1306,22 +1304,22 @@ define void @test_forked_hot_diamond(i32* %a) { ; CHECK: %fork2 ; CHECK: %exit entry: - %gep1 = getelementptr i32, i32* %a, i32 1 - %val1 = load i32, i32* %gep1 + %gep1 = getelementptr i32, ptr %a, i32 1 + %val1 = load i32, ptr %gep1 %cond1 = icmp ugt i32 %val1, 1 br i1 %cond1, label %then, label %else, !prof !5 then: call void @hot_function() - %gep2 = getelementptr i32, i32* %a, i32 2 - %val2 = load i32, i32* %gep2 + %gep2 = getelementptr i32, ptr %a, i32 2 + %val2 = load i32, ptr %gep2 %cond2 = icmp ugt i32 %val2, 2 br i1 %cond2, label %fork1, label %fork2, !prof !8 else: call void @cold_function() - %gep3 = getelementptr i32, i32* %a, i32 3 - %val3 = load i32, i32* %gep3 + %gep3 = getelementptr i32, ptr %a, i32 3 + %val3 = load i32, ptr %gep3 %cond3 = icmp ugt i32 %val3, 3 br i1 %cond3, label %fork1, label %fork2, !prof !8 @@ -1338,7 +1336,7 @@ exit: ret void } -define void @test_forked_hot_diamond_gets_cold(i32* %a) { +define void @test_forked_hot_diamond_gets_cold(ptr %a) { ; Test that a hot-branch with probability > 80% followed by a 50/50 branch ; will place the cold predecessor if the probability for the fallthrough ; falls below 80% @@ -1359,15 +1357,15 @@ define void @test_forked_hot_diamond_gets_cold(i32* %a) { ; CHECK: %fork2 ; CHECK: %exit entry: - %gep1 = getelementptr i32, i32* %a, i32 1 - %val1 = load i32, i32* %gep1 + %gep1 = getelementptr i32, ptr %a, i32 1 + %val1 = load i32, ptr %gep1 %cond1 = icmp ugt i32 %val1, 1 br i1 %cond1, label %then1, label %else1, !prof !9 then1: call void @hot_function() - %gep2 = getelementptr i32, i32* %a, i32 2 - %val2 = load i32, i32* %gep2 + %gep2 = getelementptr i32, ptr %a, i32 2 + %val2 = load i32, ptr %gep2 %cond2 = icmp ugt i32 %val2, 2 br i1 %cond2, label %then2, label %else2, !prof !9 @@ -1377,8 +1375,8 @@ else1: then2: call void @hot_function() - %gep3 = getelementptr i32, i32* %a, i32 3 - %val3 = load i32, i32* %gep2 + %gep3 = getelementptr i32, ptr %a, i32 3 + %val3 = load i32, ptr %gep2 %cond3 = icmp ugt i32 %val2, 3 br i1 %cond3, label %fork1, label %fork2, !prof !8 @@ -1399,7 +1397,7 @@ exit: ret void } -define void @test_forked_hot_diamond_stays_hot(i32* %a) { +define void @test_forked_hot_diamond_stays_hot(ptr %a) { ; Test that a hot-branch with probability > 88.88% (1:8) followed by a 50/50 ; branch will not place the cold predecessor as the probability for the ; fallthrough stays above 80% @@ -1416,15 +1414,15 @@ define void @test_forked_hot_diamond_stays_hot(i32* %a) { ; CHECK: %fork2 ; CHECK: %exit entry: - %gep1 = getelementptr i32, i32* %a, i32 1 - %val1 = load i32, i32* %gep1 + %gep1 = getelementptr i32, ptr %a, i32 1 + %val1 = load i32, ptr %gep1 %cond1 = icmp ugt i32 %val1, 1 br i1 %cond1, label %then1, label %else1, !prof !10 then1: call void @hot_function() - %gep2 = getelementptr i32, i32* %a, i32 2 - %val2 = load i32, i32* %gep2 + %gep2 = getelementptr i32, ptr %a, i32 2 + %val2 = load i32, ptr %gep2 %cond2 = icmp ugt i32 %val2, 2 br i1 %cond2, label %then2, label %else2, !prof !10 @@ -1434,8 +1432,8 @@ else1: then2: call void @hot_function() - %gep3 = getelementptr i32, i32* %a, i32 3 - %val3 = load i32, i32* %gep2 + %gep3 = getelementptr i32, ptr %a, i32 3 + %val3 = load i32, ptr %gep2 %cond3 = icmp ugt i32 %val2, 3 br i1 %cond3, label %fork1, label %fork2, !prof !8 diff --git a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll index e3c108d7a5843..8f7cbbfb08df0 100644 --- a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll +++ b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -1,14 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; ifndef INTEL_SYCL_OPAQUEPOINTER_READY ; RUN: llc < %s -mtriple=x86_64-darwin | FileCheck %s --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-darwin -mcpu=atom | FileCheck %s --check-prefix=ATOM -; end -@Te0 = external global [256 x i32] ; <[256 x i32]*> [#uses=5] -@Te1 = external global [256 x i32] ; <[256 x i32]*> [#uses=4] -@Te3 = external global [256 x i32] ; <[256 x i32]*> [#uses=2] +@Te0 = external global [256 x i32] ; [#uses=5] +@Te1 = external global [256 x i32] ; [#uses=4] +@Te3 = external global [256 x i32] ; [#uses=2] -define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind { +define void @t(ptr nocapture %in, ptr nocapture %out, ptr nocapture %rk, i32 %r) nounwind { ; GENERIC-LABEL: t: ; GENERIC: ## %bb.0: ## %entry ; GENERIC-NEXT: pushq %rbp @@ -177,9 +175,9 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r ; ATOM-NEXT: popq %rbp ; ATOM-NEXT: retq entry: - %0 = load i32, i32* %rk, align 4 ; [#uses=1] - %1 = getelementptr i32, i32* %rk, i64 1 ; [#uses=1] - %2 = load i32, i32* %1, align 4 ; [#uses=1] + %0 = load i32, ptr %rk, align 4 ; [#uses=1] + %1 = getelementptr i32, ptr %rk, i64 1 ; [#uses=1] + %2 = load i32, ptr %1, align 4 ; [#uses=1] %tmp15 = add i32 %r, -1 ; [#uses=1] %tmp.16 = zext i32 %tmp15 to i64 ; [#uses=2] br label %bb @@ -189,67 +187,67 @@ bb: ; preds = %bb1, %entry %s1.0 = phi i32 [ %2, %entry ], [ %56, %bb1 ] ; [#uses=2] %s0.0 = phi i32 [ %0, %entry ], [ %43, %bb1 ] ; [#uses=2] %tmp18 = shl i64 %indvar, 4 ; [#uses=4] - %rk26 = bitcast i32* %rk to i8* ; [#uses=6] + %rk26 = bitcast ptr %rk to ptr ; [#uses=6] %3 = lshr i32 %s0.0, 24 ; [#uses=1] %4 = zext i32 %3 to i64 ; [#uses=1] - %5 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %4 ; [#uses=1] - %6 = load i32, i32* %5, align 4 ; [#uses=1] + %5 = getelementptr [256 x i32], ptr @Te0, i64 0, i64 %4 ; [#uses=1] + %6 = load i32, ptr %5, align 4 ; [#uses=1] %7 = lshr i32 %s1.0, 16 ; [#uses=1] %8 = and i32 %7, 255 ; [#uses=1] %9 = zext i32 %8 to i64 ; [#uses=1] - %10 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %9 ; [#uses=1] - %11 = load i32, i32* %10, align 4 ; [#uses=1] + %10 = getelementptr [256 x i32], ptr @Te1, i64 0, i64 %9 ; [#uses=1] + %11 = load i32, ptr %10, align 4 ; [#uses=1] %ctg2.sum2728 = or i64 %tmp18, 8 ; [#uses=1] - %12 = getelementptr i8, i8* %rk26, i64 %ctg2.sum2728 ; [#uses=1] - %13 = bitcast i8* %12 to i32* ; [#uses=1] - %14 = load i32, i32* %13, align 4 ; [#uses=1] + %12 = getelementptr i8, ptr %rk26, i64 %ctg2.sum2728 ; [#uses=1] + %13 = bitcast ptr %12 to ptr ; [#uses=1] + %14 = load i32, ptr %13, align 4 ; [#uses=1] %15 = xor i32 %11, %6 ; [#uses=1] %16 = xor i32 %15, %14 ; [#uses=3] %17 = lshr i32 %s1.0, 24 ; [#uses=1] %18 = zext i32 %17 to i64 ; [#uses=1] - %19 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %18 ; [#uses=1] - %20 = load i32, i32* %19, align 4 ; [#uses=1] + %19 = getelementptr [256 x i32], ptr @Te0, i64 0, i64 %18 ; [#uses=1] + %20 = load i32, ptr %19, align 4 ; [#uses=1] %21 = and i32 %s0.0, 255 ; [#uses=1] %22 = zext i32 %21 to i64 ; [#uses=1] - %23 = getelementptr [256 x i32], [256 x i32]* @Te3, i64 0, i64 %22 ; [#uses=1] - %24 = load i32, i32* %23, align 4 ; [#uses=1] + %23 = getelementptr [256 x i32], ptr @Te3, i64 0, i64 %22 ; [#uses=1] + %24 = load i32, ptr %23, align 4 ; [#uses=1] %ctg2.sum2930 = or i64 %tmp18, 12 ; [#uses=1] - %25 = getelementptr i8, i8* %rk26, i64 %ctg2.sum2930 ; [#uses=1] - %26 = bitcast i8* %25 to i32* ; [#uses=1] - %27 = load i32, i32* %26, align 4 ; [#uses=1] + %25 = getelementptr i8, ptr %rk26, i64 %ctg2.sum2930 ; [#uses=1] + %26 = bitcast ptr %25 to ptr ; [#uses=1] + %27 = load i32, ptr %26, align 4 ; [#uses=1] %28 = xor i32 %24, %20 ; [#uses=1] %29 = xor i32 %28, %27 ; [#uses=4] %30 = lshr i32 %16, 24 ; [#uses=1] %31 = zext i32 %30 to i64 ; [#uses=1] - %32 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %31 ; [#uses=1] - %33 = load i32, i32* %32, align 4 ; [#uses=2] + %32 = getelementptr [256 x i32], ptr @Te0, i64 0, i64 %31 ; [#uses=1] + %33 = load i32, ptr %32, align 4 ; [#uses=2] %exitcond = icmp eq i64 %indvar, %tmp.16 ; [#uses=1] br i1 %exitcond, label %bb2, label %bb1 bb1: ; preds = %bb %ctg2.sum31 = add i64 %tmp18, 16 ; [#uses=1] - %34 = getelementptr i8, i8* %rk26, i64 %ctg2.sum31 ; [#uses=1] - %35 = bitcast i8* %34 to i32* ; [#uses=1] + %34 = getelementptr i8, ptr %rk26, i64 %ctg2.sum31 ; [#uses=1] + %35 = bitcast ptr %34 to ptr ; [#uses=1] %36 = lshr i32 %29, 16 ; [#uses=1] %37 = and i32 %36, 255 ; [#uses=1] %38 = zext i32 %37 to i64 ; [#uses=1] - %39 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %38 ; [#uses=1] - %40 = load i32, i32* %39, align 4 ; [#uses=1] - %41 = load i32, i32* %35, align 4 ; [#uses=1] + %39 = getelementptr [256 x i32], ptr @Te1, i64 0, i64 %38 ; [#uses=1] + %40 = load i32, ptr %39, align 4 ; [#uses=1] + %41 = load i32, ptr %35, align 4 ; [#uses=1] %42 = xor i32 %40, %33 ; [#uses=1] %43 = xor i32 %42, %41 ; [#uses=1] %44 = lshr i32 %29, 24 ; [#uses=1] %45 = zext i32 %44 to i64 ; [#uses=1] - %46 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %45 ; [#uses=1] - %47 = load i32, i32* %46, align 4 ; [#uses=1] + %46 = getelementptr [256 x i32], ptr @Te0, i64 0, i64 %45 ; [#uses=1] + %47 = load i32, ptr %46, align 4 ; [#uses=1] %48 = and i32 %16, 255 ; [#uses=1] %49 = zext i32 %48 to i64 ; [#uses=1] - %50 = getelementptr [256 x i32], [256 x i32]* @Te3, i64 0, i64 %49 ; [#uses=1] - %51 = load i32, i32* %50, align 4 ; [#uses=1] + %50 = getelementptr [256 x i32], ptr @Te3, i64 0, i64 %49 ; [#uses=1] + %51 = load i32, ptr %50, align 4 ; [#uses=1] %ctg2.sum32 = add i64 %tmp18, 20 ; [#uses=1] - %52 = getelementptr i8, i8* %rk26, i64 %ctg2.sum32 ; [#uses=1] - %53 = bitcast i8* %52 to i32* ; [#uses=1] - %54 = load i32, i32* %53, align 4 ; [#uses=1] + %52 = getelementptr i8, ptr %rk26, i64 %ctg2.sum32 ; [#uses=1] + %53 = bitcast ptr %52 to ptr ; [#uses=1] + %54 = load i32, ptr %53, align 4 ; [#uses=1] %55 = xor i32 %51, %47 ; [#uses=1] %56 = xor i32 %55, %54 ; [#uses=1] %indvar.next = add i64 %indvar, 1 ; [#uses=1] @@ -258,49 +256,49 @@ bb1: ; preds = %bb bb2: ; preds = %bb %tmp10 = shl i64 %tmp.16, 4 ; [#uses=2] %ctg2.sum = add i64 %tmp10, 16 ; [#uses=1] - %tmp1213 = getelementptr i8, i8* %rk26, i64 %ctg2.sum ; [#uses=1] - %57 = bitcast i8* %tmp1213 to i32* ; [#uses=1] + %tmp1213 = getelementptr i8, ptr %rk26, i64 %ctg2.sum ; [#uses=1] + %57 = bitcast ptr %tmp1213 to ptr ; [#uses=1] %58 = and i32 %33, -16777216 ; [#uses=1] %59 = lshr i32 %29, 16 ; [#uses=1] %60 = and i32 %59, 255 ; [#uses=1] %61 = zext i32 %60 to i64 ; [#uses=1] - %62 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %61 ; [#uses=1] - %63 = load i32, i32* %62, align 4 ; [#uses=1] + %62 = getelementptr [256 x i32], ptr @Te1, i64 0, i64 %61 ; [#uses=1] + %63 = load i32, ptr %62, align 4 ; [#uses=1] %64 = and i32 %63, 16711680 ; [#uses=1] %65 = or i32 %64, %58 ; [#uses=1] - %66 = load i32, i32* %57, align 4 ; [#uses=1] + %66 = load i32, ptr %57, align 4 ; [#uses=1] %67 = xor i32 %65, %66 ; [#uses=2] %68 = lshr i32 %29, 8 ; [#uses=1] %69 = zext i32 %68 to i64 ; [#uses=1] - %70 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %69 ; [#uses=1] - %71 = load i32, i32* %70, align 4 ; [#uses=1] + %70 = getelementptr [256 x i32], ptr @Te0, i64 0, i64 %69 ; [#uses=1] + %71 = load i32, ptr %70, align 4 ; [#uses=1] %72 = and i32 %71, -16777216 ; [#uses=1] %73 = and i32 %16, 255 ; [#uses=1] %74 = zext i32 %73 to i64 ; [#uses=1] - %75 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %74 ; [#uses=1] - %76 = load i32, i32* %75, align 4 ; [#uses=1] + %75 = getelementptr [256 x i32], ptr @Te1, i64 0, i64 %74 ; [#uses=1] + %76 = load i32, ptr %75, align 4 ; [#uses=1] %77 = and i32 %76, 16711680 ; [#uses=1] %78 = or i32 %77, %72 ; [#uses=1] %ctg2.sum25 = add i64 %tmp10, 20 ; [#uses=1] - %79 = getelementptr i8, i8* %rk26, i64 %ctg2.sum25 ; [#uses=1] - %80 = bitcast i8* %79 to i32* ; [#uses=1] - %81 = load i32, i32* %80, align 4 ; [#uses=1] + %79 = getelementptr i8, ptr %rk26, i64 %ctg2.sum25 ; [#uses=1] + %80 = bitcast ptr %79 to ptr ; [#uses=1] + %81 = load i32, ptr %80, align 4 ; [#uses=1] %82 = xor i32 %78, %81 ; [#uses=2] %83 = lshr i32 %67, 24 ; [#uses=1] %84 = trunc i32 %83 to i8 ; [#uses=1] - store i8 %84, i8* %out, align 1 + store i8 %84, ptr %out, align 1 %85 = lshr i32 %67, 16 ; [#uses=1] %86 = trunc i32 %85 to i8 ; [#uses=1] - %87 = getelementptr i8, i8* %out, i64 1 ; [#uses=1] - store i8 %86, i8* %87, align 1 - %88 = getelementptr i8, i8* %out, i64 4 ; [#uses=1] + %87 = getelementptr i8, ptr %out, i64 1 ; [#uses=1] + store i8 %86, ptr %87, align 1 + %88 = getelementptr i8, ptr %out, i64 4 ; [#uses=1] %89 = lshr i32 %82, 24 ; [#uses=1] %90 = trunc i32 %89 to i8 ; [#uses=1] - store i8 %90, i8* %88, align 1 + store i8 %90, ptr %88, align 1 %91 = lshr i32 %82, 16 ; [#uses=1] %92 = trunc i32 %91 to i8 ; [#uses=1] - %93 = getelementptr i8, i8* %out, i64 5 ; [#uses=1] - store i8 %92, i8* %93, align 1 + %93 = getelementptr i8, ptr %out, i64 5 ; [#uses=1] + store i8 %92, ptr %93, align 1 ret void } @@ -308,7 +306,7 @@ bb2: ; preds = %bb ; is equal to the stride. ; It must not fold (cmp (add iv, 1), 1) --> (cmp iv, 0). -define i32 @f(i32 %i, i32* nocapture %a) nounwind uwtable readonly ssp { +define i32 @f(i32 %i, ptr nocapture %a) nounwind uwtable readonly ssp { ; GENERIC-LABEL: f: ; GENERIC: ## %bb.0: ## %entry ; GENERIC-NEXT: xorl %eax, %eax @@ -370,8 +368,8 @@ for.body: ; preds = %for.body.lr.ph, %fo %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] %bi.06 = phi i32 [ 0, %for.body.lr.ph ], [ %i.addr.0.bi.0, %for.body ] %b.05 = phi i32 [ 0, %for.body.lr.ph ], [ %.b.0, %for.body ] - %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv - %1 = load i32, i32* %arrayidx, align 4 + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + %1 = load i32, ptr %arrayidx, align 4 %cmp1 = icmp ugt i32 %1, %b.05 %.b.0 = select i1 %cmp1, i32 %1, i32 %b.05 %2 = trunc i64 %indvars.iv to i32 diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll index 801e395a86d78..177ae2aa40578 100644 --- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll +++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; ifndef INTEL_SYCL_OPAQUEPOINTER_READY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -mattr=prefer-256-bit | FileCheck %s --check-prefixes=CHECK,CHECK-SKX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -mattr=prefer-256-bit,avx512vbmi | FileCheck %s --check-prefixes=CHECK,CHECK-SKX,CHECK-SKX-VBMI ; Make sure CPUs default to prefer-256-bit. avx512vnni isn't interesting as it just adds an isel peephole for vpmaddwd+vpaddd @@ -10,11 +9,10 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-avx512vnni -mcpu=icelake-client | FileCheck %s --check-prefixes=CHECK,CHECK-VBMI ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-avx512vnni -mcpu=icelake-server | FileCheck %s --check-prefixes=CHECK,CHECK-VBMI ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-avx512vnni -mcpu=tigerlake | FileCheck %s --check-prefixes=CHECK,CHECK-VBMI -; end ; This file primarily contains tests for specific places in X86ISelLowering.cpp that needed be made aware of the legalizer not allowing 512-bit vectors due to prefer-256-bit even though AVX512 is enabled. -define dso_local void @add256(<16 x i32>* %a, <16 x i32>* %b, <16 x i32>* %c) "min-legal-vector-width"="256" { +define dso_local void @add256(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"="256" { ; CHECK-LABEL: add256: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm0 @@ -25,14 +23,14 @@ define dso_local void @add256(<16 x i32>* %a, <16 x i32>* %b, <16 x i32>* %c) "m ; CHECK-NEXT: vmovdqa %ymm1, 32(%rdx) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %d = load <16 x i32>, <16 x i32>* %a - %e = load <16 x i32>, <16 x i32>* %b + %d = load <16 x i32>, ptr %a + %e = load <16 x i32>, ptr %b %f = add <16 x i32> %d, %e - store <16 x i32> %f, <16 x i32>* %c + store <16 x i32> %f, ptr %c ret void } -define dso_local void @add512(<16 x i32>* %a, <16 x i32>* %b, <16 x i32>* %c) "min-legal-vector-width"="512" { +define dso_local void @add512(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"="512" { ; CHECK-LABEL: add512: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 @@ -40,14 +38,14 @@ define dso_local void @add512(<16 x i32>* %a, <16 x i32>* %b, <16 x i32>* %c) "m ; CHECK-NEXT: vmovdqa64 %zmm0, (%rdx) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %d = load <16 x i32>, <16 x i32>* %a - %e = load <16 x i32>, <16 x i32>* %b + %d = load <16 x i32>, ptr %a + %e = load <16 x i32>, ptr %b %f = add <16 x i32> %d, %e - store <16 x i32> %f, <16 x i32>* %c + store <16 x i32> %f, ptr %c ret void } -define dso_local void @avg_v64i8_256(<64 x i8>* %a, <64 x i8>* %b) "min-legal-vector-width"="256" { +define dso_local void @avg_v64i8_256(ptr %a, ptr %b) "min-legal-vector-width"="256" { ; CHECK-LABEL: avg_v64i8_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rsi), %ymm0 @@ -58,20 +56,20 @@ define dso_local void @avg_v64i8_256(<64 x i8>* %a, <64 x i8>* %b) "min-legal-ve ; CHECK-NEXT: vmovdqu %ymm0, (%rax) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %1 = load <64 x i8>, <64 x i8>* %a - %2 = load <64 x i8>, <64 x i8>* %b + %1 = load <64 x i8>, ptr %a + %2 = load <64 x i8>, ptr %b %3 = zext <64 x i8> %1 to <64 x i32> %4 = zext <64 x i8> %2 to <64 x i32> %5 = add nuw nsw <64 x i32> %3, %6 = add nuw nsw <64 x i32> %5, %4 %7 = lshr <64 x i32> %6, %8 = trunc <64 x i32> %7 to <64 x i8> - store <64 x i8> %8, <64 x i8>* undef, align 4 + store <64 x i8> %8, ptr undef, align 4 ret void } -define dso_local void @avg_v64i8_512(<64 x i8>* %a, <64 x i8>* %b) "min-legal-vector-width"="512" { +define dso_local void @avg_v64i8_512(ptr %a, ptr %b) "min-legal-vector-width"="512" { ; CHECK-LABEL: avg_v64i8_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 @@ -79,19 +77,19 @@ define dso_local void @avg_v64i8_512(<64 x i8>* %a, <64 x i8>* %b) "min-legal-ve ; CHECK-NEXT: vmovdqu64 %zmm0, (%rax) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %1 = load <64 x i8>, <64 x i8>* %a - %2 = load <64 x i8>, <64 x i8>* %b + %1 = load <64 x i8>, ptr %a + %2 = load <64 x i8>, ptr %b %3 = zext <64 x i8> %1 to <64 x i32> %4 = zext <64 x i8> %2 to <64 x i32> %5 = add nuw nsw <64 x i32> %3, %6 = add nuw nsw <64 x i32> %5, %4 %7 = lshr <64 x i32> %6, %8 = trunc <64 x i32> %7 to <64 x i8> - store <64 x i8> %8, <64 x i8>* undef, align 4 + store <64 x i8> %8, ptr undef, align 4 ret void } -define dso_local void @pmaddwd_32_256(<32 x i16>* %APtr, <32 x i16>* %BPtr, <16 x i32>* %CPtr) "min-legal-vector-width"="256" { +define dso_local void @pmaddwd_32_256(ptr %APtr, ptr %BPtr, ptr %CPtr) "min-legal-vector-width"="256" { ; CHECK-LABEL: pmaddwd_32_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm0 @@ -102,19 +100,19 @@ define dso_local void @pmaddwd_32_256(<32 x i16>* %APtr, <32 x i16>* %BPtr, <16 ; CHECK-NEXT: vmovdqa %ymm1, 32(%rdx) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %A = load <32 x i16>, <32 x i16>* %APtr - %B = load <32 x i16>, <32 x i16>* %BPtr + %A = load <32 x i16>, ptr %APtr + %B = load <32 x i16>, ptr %BPtr %a = sext <32 x i16> %A to <32 x i32> %b = sext <32 x i16> %B to <32 x i32> %m = mul nsw <32 x i32> %a, %b %odd = shufflevector <32 x i32> %m, <32 x i32> undef, <16 x i32> %even = shufflevector <32 x i32> %m, <32 x i32> undef, <16 x i32> %ret = add <16 x i32> %odd, %even - store <16 x i32> %ret, <16 x i32>* %CPtr + store <16 x i32> %ret, ptr %CPtr ret void } -define dso_local void @pmaddwd_32_512(<32 x i16>* %APtr, <32 x i16>* %BPtr, <16 x i32>* %CPtr) "min-legal-vector-width"="512" { +define dso_local void @pmaddwd_32_512(ptr %APtr, ptr %BPtr, ptr %CPtr) "min-legal-vector-width"="512" { ; CHECK-LABEL: pmaddwd_32_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 @@ -122,19 +120,19 @@ define dso_local void @pmaddwd_32_512(<32 x i16>* %APtr, <32 x i16>* %BPtr, <16 ; CHECK-NEXT: vmovdqa64 %zmm0, (%rdx) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %A = load <32 x i16>, <32 x i16>* %APtr - %B = load <32 x i16>, <32 x i16>* %BPtr + %A = load <32 x i16>, ptr %APtr + %B = load <32 x i16>, ptr %BPtr %a = sext <32 x i16> %A to <32 x i32> %b = sext <32 x i16> %B to <32 x i32> %m = mul nsw <32 x i32> %a, %b %odd = shufflevector <32 x i32> %m, <32 x i32> undef, <16 x i32> %even = shufflevector <32 x i32> %m, <32 x i32> undef, <16 x i32> %ret = add <16 x i32> %odd, %even - store <16 x i32> %ret, <16 x i32>* %CPtr + store <16 x i32> %ret, ptr %CPtr ret void } -define dso_local void @psubus_64i8_max_256(<64 x i8>* %xptr, <64 x i8>* %yptr, <64 x i8>* %zptr) "min-legal-vector-width"="256" { +define dso_local void @psubus_64i8_max_256(ptr %xptr, ptr %yptr, ptr %zptr) "min-legal-vector-width"="256" { ; CHECK-LABEL: psubus_64i8_max_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm0 @@ -145,16 +143,16 @@ define dso_local void @psubus_64i8_max_256(<64 x i8>* %xptr, <64 x i8>* %yptr, < ; CHECK-NEXT: vmovdqa %ymm1, 32(%rdx) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %x = load <64 x i8>, <64 x i8>* %xptr - %y = load <64 x i8>, <64 x i8>* %yptr + %x = load <64 x i8>, ptr %xptr + %y = load <64 x i8>, ptr %yptr %cmp = icmp ult <64 x i8> %x, %y %max = select <64 x i1> %cmp, <64 x i8> %y, <64 x i8> %x %res = sub <64 x i8> %max, %y - store <64 x i8> %res, <64 x i8>* %zptr + store <64 x i8> %res, ptr %zptr ret void } -define dso_local void @psubus_64i8_max_512(<64 x i8>* %xptr, <64 x i8>* %yptr, <64 x i8>* %zptr) "min-legal-vector-width"="512" { +define dso_local void @psubus_64i8_max_512(ptr %xptr, ptr %yptr, ptr %zptr) "min-legal-vector-width"="512" { ; CHECK-LABEL: psubus_64i8_max_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 @@ -162,16 +160,16 @@ define dso_local void @psubus_64i8_max_512(<64 x i8>* %xptr, <64 x i8>* %yptr, < ; CHECK-NEXT: vmovdqa64 %zmm0, (%rdx) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %x = load <64 x i8>, <64 x i8>* %xptr - %y = load <64 x i8>, <64 x i8>* %yptr + %x = load <64 x i8>, ptr %xptr + %y = load <64 x i8>, ptr %yptr %cmp = icmp ult <64 x i8> %x, %y %max = select <64 x i1> %cmp, <64 x i8> %y, <64 x i8> %x %res = sub <64 x i8> %max, %y - store <64 x i8> %res, <64 x i8>* %zptr + store <64 x i8> %res, ptr %zptr ret void } -define dso_local i32 @_Z9test_charPcS_i_256(i8* nocapture readonly, i8* nocapture readonly, i32) "min-legal-vector-width"="256" { +define dso_local i32 @_Z9test_charPcS_i_256(ptr nocapture readonly, ptr nocapture readonly, i32) "min-legal-vector-width"="256" { ; CHECK-SKX-LABEL: _Z9test_charPcS_i_256: ; CHECK-SKX: # %bb.0: # %entry ; CHECK-SKX-NEXT: movl %edx, %eax @@ -283,13 +281,13 @@ entry: vector.body: %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] %vec.phi = phi <32 x i32> [ %11, %vector.body ], [ zeroinitializer, %entry ] - %4 = getelementptr inbounds i8, i8* %0, i64 %index - %5 = bitcast i8* %4 to <32 x i8>* - %wide.load = load <32 x i8>, <32 x i8>* %5, align 1 + %4 = getelementptr inbounds i8, ptr %0, i64 %index + %5 = bitcast ptr %4 to ptr + %wide.load = load <32 x i8>, ptr %5, align 1 %6 = sext <32 x i8> %wide.load to <32 x i32> - %7 = getelementptr inbounds i8, i8* %1, i64 %index - %8 = bitcast i8* %7 to <32 x i8>* - %wide.load14 = load <32 x i8>, <32 x i8>* %8, align 1 + %7 = getelementptr inbounds i8, ptr %1, i64 %index + %8 = bitcast ptr %7 to ptr + %wide.load14 = load <32 x i8>, ptr %8, align 1 %9 = sext <32 x i8> %wide.load14 to <32 x i32> %10 = mul nsw <32 x i32> %9, %6 %11 = add nsw <32 x i32> %10, %vec.phi @@ -312,7 +310,7 @@ middle.block: ret i32 %13 } -define dso_local i32 @_Z9test_charPcS_i_512(i8* nocapture readonly, i8* nocapture readonly, i32) "min-legal-vector-width"="512" { +define dso_local i32 @_Z9test_charPcS_i_512(ptr nocapture readonly, ptr nocapture readonly, i32) "min-legal-vector-width"="512" { ; CHECK-SKX-LABEL: _Z9test_charPcS_i_512: ; CHECK-SKX: # %bb.0: # %entry ; CHECK-SKX-NEXT: movl %edx, %eax @@ -409,13 +407,13 @@ entry: vector.body: %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ] %vec.phi = phi <32 x i32> [ %11, %vector.body ], [ zeroinitializer, %entry ] - %4 = getelementptr inbounds i8, i8* %0, i64 %index - %5 = bitcast i8* %4 to <32 x i8>* - %wide.load = load <32 x i8>, <32 x i8>* %5, align 1 + %4 = getelementptr inbounds i8, ptr %0, i64 %index + %5 = bitcast ptr %4 to ptr + %wide.load = load <32 x i8>, ptr %5, align 1 %6 = sext <32 x i8> %wide.load to <32 x i32> - %7 = getelementptr inbounds i8, i8* %1, i64 %index - %8 = bitcast i8* %7 to <32 x i8>* - %wide.load14 = load <32 x i8>, <32 x i8>* %8, align 1 + %7 = getelementptr inbounds i8, ptr %1, i64 %index + %8 = bitcast ptr %7 to ptr + %wide.load14 = load <32 x i8>, ptr %8, align 1 %9 = sext <32 x i8> %wide.load14 to <32 x i32> %10 = mul nsw <32 x i32> %9, %6 %11 = add nsw <32 x i32> %10, %vec.phi @@ -522,13 +520,13 @@ entry: vector.body: %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %vec.phi = phi <16 x i32> [ zeroinitializer, %entry ], [ %10, %vector.body ] - %0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %index - %1 = bitcast i8* %0 to <16 x i8>* - %wide.load = load <16 x i8>, <16 x i8>* %1, align 4 + %0 = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <16 x i8>, ptr %1, align 4 %2 = zext <16 x i8> %wide.load to <16 x i32> - %3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %index - %4 = bitcast i8* %3 to <16 x i8>* - %wide.load1 = load <16 x i8>, <16 x i8>* %4, align 4 + %3 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %index + %4 = bitcast ptr %3 to ptr + %wide.load1 = load <16 x i8>, ptr %4, align 4 %5 = zext <16 x i8> %wide.load1 to <16 x i32> %6 = sub nsw <16 x i32> %2, %5 %7 = icmp sgt <16 x i32> %6, @@ -633,13 +631,13 @@ entry: vector.body: %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %vec.phi = phi <16 x i32> [ zeroinitializer, %entry ], [ %10, %vector.body ] - %0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %index - %1 = bitcast i8* %0 to <16 x i8>* - %wide.load = load <16 x i8>, <16 x i8>* %1, align 4 + %0 = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <16 x i8>, ptr %1, align 4 %2 = zext <16 x i8> %wide.load to <16 x i32> - %3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %index - %4 = bitcast i8* %3 to <16 x i8>* - %wide.load1 = load <16 x i8>, <16 x i8>* %4, align 4 + %3 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %index + %4 = bitcast ptr %3 to ptr + %wide.load1 = load <16 x i8>, ptr %4, align 4 %5 = zext <16 x i8> %wide.load1 to <16 x i32> %6 = sub nsw <16 x i32> %2, %5 %7 = icmp sgt <16 x i32> %6, @@ -663,7 +661,7 @@ middle.block: ret i32 %12 } -define dso_local void @sbto16f32_256(<16 x i16> %a, <16 x float>* %res) "min-legal-vector-width"="256" { +define dso_local void @sbto16f32_256(<16 x i16> %a, ptr %res) "min-legal-vector-width"="256" { ; CHECK-LABEL: sbto16f32_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovw2m %ymm0, %k0 @@ -678,11 +676,11 @@ define dso_local void @sbto16f32_256(<16 x i16> %a, <16 x float>* %res) "min-leg ; CHECK-NEXT: retq %mask = icmp slt <16 x i16> %a, zeroinitializer %1 = sitofp <16 x i1> %mask to <16 x float> - store <16 x float> %1, <16 x float>* %res + store <16 x float> %1, ptr %res ret void } -define dso_local void @sbto16f32_512(<16 x i16> %a, <16 x float>* %res) "min-legal-vector-width"="512" { +define dso_local void @sbto16f32_512(<16 x i16> %a, ptr %res) "min-legal-vector-width"="512" { ; CHECK-LABEL: sbto16f32_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovw2m %ymm0, %k0 @@ -693,11 +691,11 @@ define dso_local void @sbto16f32_512(<16 x i16> %a, <16 x float>* %res) "min-leg ; CHECK-NEXT: retq %mask = icmp slt <16 x i16> %a, zeroinitializer %1 = sitofp <16 x i1> %mask to <16 x float> - store <16 x float> %1, <16 x float>* %res + store <16 x float> %1, ptr %res ret void } -define dso_local void @sbto16f64_256(<16 x i16> %a, <16 x double>* %res) "min-legal-vector-width"="256" { +define dso_local void @sbto16f64_256(<16 x i16> %a, ptr %res) "min-legal-vector-width"="256" { ; CHECK-LABEL: sbto16f64_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovw2m %ymm0, %k0 @@ -718,11 +716,11 @@ define dso_local void @sbto16f64_256(<16 x i16> %a, <16 x double>* %res) "min-l ; CHECK-NEXT: retq %mask = icmp slt <16 x i16> %a, zeroinitializer %1 = sitofp <16 x i1> %mask to <16 x double> - store <16 x double> %1, <16 x double>* %res + store <16 x double> %1, ptr %res ret void } -define dso_local void @sbto16f64_512(<16 x i16> %a, <16 x double>* %res) "min-legal-vector-width"="512" { +define dso_local void @sbto16f64_512(<16 x i16> %a, ptr %res) "min-legal-vector-width"="512" { ; CHECK-LABEL: sbto16f64_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovw2m %ymm0, %k0 @@ -736,11 +734,11 @@ define dso_local void @sbto16f64_512(<16 x i16> %a, <16 x double>* %res) "min-l ; CHECK-NEXT: retq %mask = icmp slt <16 x i16> %a, zeroinitializer %1 = sitofp <16 x i1> %mask to <16 x double> - store <16 x double> %1, <16 x double>* %res + store <16 x double> %1, ptr %res ret void } -define dso_local void @ubto16f32_256(<16 x i16> %a, <16 x float>* %res) "min-legal-vector-width"="256" { +define dso_local void @ubto16f32_256(<16 x i16> %a, ptr %res) "min-legal-vector-width"="256" { ; CHECK-LABEL: ubto16f32_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovw2m %ymm0, %k0 @@ -757,11 +755,11 @@ define dso_local void @ubto16f32_256(<16 x i16> %a, <16 x float>* %res) "min-leg ; CHECK-NEXT: retq %mask = icmp slt <16 x i16> %a, zeroinitializer %1 = uitofp <16 x i1> %mask to <16 x float> - store <16 x float> %1, <16 x float>* %res + store <16 x float> %1, ptr %res ret void } -define dso_local void @ubto16f32_512(<16 x i16> %a, <16 x float>* %res) "min-legal-vector-width"="512" { +define dso_local void @ubto16f32_512(<16 x i16> %a, ptr %res) "min-legal-vector-width"="512" { ; CHECK-LABEL: ubto16f32_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovw2m %ymm0, %k0 @@ -773,11 +771,11 @@ define dso_local void @ubto16f32_512(<16 x i16> %a, <16 x float>* %res) "min-leg ; CHECK-NEXT: retq %mask = icmp slt <16 x i16> %a, zeroinitializer %1 = uitofp <16 x i1> %mask to <16 x float> - store <16 x float> %1, <16 x float>* %res + store <16 x float> %1, ptr %res ret void } -define dso_local void @ubto16f64_256(<16 x i16> %a, <16 x double>* %res) "min-legal-vector-width"="256" { +define dso_local void @ubto16f64_256(<16 x i16> %a, ptr %res) "min-legal-vector-width"="256" { ; CHECK-LABEL: ubto16f64_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovw2m %ymm0, %k0 @@ -800,11 +798,11 @@ define dso_local void @ubto16f64_256(<16 x i16> %a, <16 x double>* %res) "min-le ; CHECK-NEXT: retq %mask = icmp slt <16 x i16> %a, zeroinitializer %1 = uitofp <16 x i1> %mask to <16 x double> - store <16 x double> %1, <16 x double>* %res + store <16 x double> %1, ptr %res ret void } -define dso_local void @ubto16f64_512(<16 x i16> %a, <16 x double>* %res) "min-legal-vector-width"="512" { +define dso_local void @ubto16f64_512(<16 x i16> %a, ptr %res) "min-legal-vector-width"="512" { ; CHECK-LABEL: ubto16f64_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovw2m %ymm0, %k0 @@ -819,11 +817,11 @@ define dso_local void @ubto16f64_512(<16 x i16> %a, <16 x double>* %res) "min-le ; CHECK-NEXT: retq %mask = icmp slt <16 x i16> %a, zeroinitializer %1 = uitofp <16 x i1> %mask to <16 x double> - store <16 x double> %1, <16 x double>* %res + store <16 x double> %1, ptr %res ret void } -define <16 x i16> @test_16f32toub_256(<16 x float>* %ptr, <16 x i16> %passthru) "min-legal-vector-width"="256" { +define <16 x i16> @test_16f32toub_256(ptr %ptr, <16 x i16> %passthru) "min-legal-vector-width"="256" { ; CHECK-LABEL: test_16f32toub_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttps2dq (%rdi), %ymm1 @@ -835,13 +833,13 @@ define <16 x i16> @test_16f32toub_256(<16 x float>* %ptr, <16 x i16> %passthru) ; CHECK-NEXT: kunpckbw %k0, %k1, %k1 ; CHECK-NEXT: vmovdqu16 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq - %a = load <16 x float>, <16 x float>* %ptr + %a = load <16 x float>, ptr %ptr %mask = fptoui <16 x float> %a to <16 x i1> %select = select <16 x i1> %mask, <16 x i16> %passthru, <16 x i16> zeroinitializer ret <16 x i16> %select } -define <16 x i16> @test_16f32toub_512(<16 x float>* %ptr, <16 x i16> %passthru) "min-legal-vector-width"="512" { +define <16 x i16> @test_16f32toub_512(ptr %ptr, <16 x i16> %passthru) "min-legal-vector-width"="512" { ; CHECK-LABEL: test_16f32toub_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttps2dq (%rdi), %zmm1 @@ -849,13 +847,13 @@ define <16 x i16> @test_16f32toub_512(<16 x float>* %ptr, <16 x i16> %passthru) ; CHECK-NEXT: vpmovd2m %zmm1, %k1 ; CHECK-NEXT: vmovdqu16 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq - %a = load <16 x float>, <16 x float>* %ptr + %a = load <16 x float>, ptr %ptr %mask = fptoui <16 x float> %a to <16 x i1> %select = select <16 x i1> %mask, <16 x i16> %passthru, <16 x i16> zeroinitializer ret <16 x i16> %select } -define <16 x i16> @test_16f32tosb_256(<16 x float>* %ptr, <16 x i16> %passthru) "min-legal-vector-width"="256" { +define <16 x i16> @test_16f32tosb_256(ptr %ptr, <16 x i16> %passthru) "min-legal-vector-width"="256" { ; CHECK-LABEL: test_16f32tosb_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttps2dq (%rdi), %ymm1 @@ -865,26 +863,26 @@ define <16 x i16> @test_16f32tosb_256(<16 x float>* %ptr, <16 x i16> %passthru) ; CHECK-NEXT: kunpckbw %k0, %k1, %k1 ; CHECK-NEXT: vmovdqu16 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq - %a = load <16 x float>, <16 x float>* %ptr + %a = load <16 x float>, ptr %ptr %mask = fptosi <16 x float> %a to <16 x i1> %select = select <16 x i1> %mask, <16 x i16> %passthru, <16 x i16> zeroinitializer ret <16 x i16> %select } -define <16 x i16> @test_16f32tosb_512(<16 x float>* %ptr, <16 x i16> %passthru) "min-legal-vector-width"="512" { +define <16 x i16> @test_16f32tosb_512(ptr %ptr, <16 x i16> %passthru) "min-legal-vector-width"="512" { ; CHECK-LABEL: test_16f32tosb_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttps2dq (%rdi), %zmm1 ; CHECK-NEXT: vpmovd2m %zmm1, %k1 ; CHECK-NEXT: vmovdqu16 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq - %a = load <16 x float>, <16 x float>* %ptr + %a = load <16 x float>, ptr %ptr %mask = fptosi <16 x float> %a to <16 x i1> %select = select <16 x i1> %mask, <16 x i16> %passthru, <16 x i16> zeroinitializer ret <16 x i16> %select } -define dso_local void @mul256(<64 x i8>* %a, <64 x i8>* %b, <64 x i8>* %c) "min-legal-vector-width"="256" { +define dso_local void @mul256(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"="256" { ; CHECK-SKX-VBMI-LABEL: mul256: ; CHECK-SKX-VBMI: # %bb.0: ; CHECK-SKX-VBMI-NEXT: vmovdqa (%rdi), %ymm0 @@ -966,14 +964,14 @@ define dso_local void @mul256(<64 x i8>* %a, <64 x i8>* %b, <64 x i8>* %c) "min- ; CHECK-VBMI-NEXT: vmovdqa %ymm1, 32(%rdx) ; CHECK-VBMI-NEXT: vzeroupper ; CHECK-VBMI-NEXT: retq - %d = load <64 x i8>, <64 x i8>* %a - %e = load <64 x i8>, <64 x i8>* %b + %d = load <64 x i8>, ptr %a + %e = load <64 x i8>, ptr %b %f = mul <64 x i8> %d, %e - store <64 x i8> %f, <64 x i8>* %c + store <64 x i8> %f, ptr %c ret void } -define dso_local void @mul512(<64 x i8>* %a, <64 x i8>* %b, <64 x i8>* %c) "min-legal-vector-width"="512" { +define dso_local void @mul512(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"="512" { ; CHECK-SKX-VBMI-LABEL: mul512: ; CHECK-SKX-VBMI: # %bb.0: ; CHECK-SKX-VBMI-NEXT: vmovdqa64 (%rdi), %zmm0 @@ -1023,27 +1021,27 @@ define dso_local void @mul512(<64 x i8>* %a, <64 x i8>* %b, <64 x i8>* %c) "min- ; CHECK-VBMI-NEXT: vmovdqa64 %zmm1, (%rdx) ; CHECK-VBMI-NEXT: vzeroupper ; CHECK-VBMI-NEXT: retq - %d = load <64 x i8>, <64 x i8>* %a - %e = load <64 x i8>, <64 x i8>* %b + %d = load <64 x i8>, ptr %a + %e = load <64 x i8>, ptr %b %f = mul <64 x i8> %d, %e - store <64 x i8> %f, <64 x i8>* %c + store <64 x i8> %f, ptr %c ret void } ; This threw an assertion at one point. -define <4 x i32> @mload_v4i32(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) "min-legal-vector-width"="256" { +define <4 x i32> @mload_v4i32(<4 x i32> %trigger, ptr %addr, <4 x i32> %dst) "min-legal-vector-width"="256" { ; CHECK-LABEL: mload_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 ; CHECK-NEXT: vpblendmd (%rdi), %xmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %mask = icmp eq <4 x i32> %trigger, zeroinitializer - %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) + %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) ret <4 x i32> %res } -declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>) -define <16 x i32> @trunc_v16i64_v16i32(<16 x i64>* %x) nounwind "min-legal-vector-width"="256" { +define <16 x i32> @trunc_v16i64_v16i32(ptr %x) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_v16i64_v16i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm0 @@ -1057,12 +1055,12 @@ define <16 x i32> @trunc_v16i64_v16i32(<16 x i64>* %x) nounwind "min-legal-vecto ; CHECK-NEXT: vpmovqd %ymm3, %xmm2 ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; CHECK-NEXT: retq - %a = load <16 x i64>, <16 x i64>* %x + %a = load <16 x i64>, ptr %x %b = trunc <16 x i64> %a to <16 x i32> ret <16 x i32> %b } -define <16 x i8> @trunc_v16i64_v16i8(<16 x i64>* %x) nounwind "min-legal-vector-width"="256" { +define <16 x i8> @trunc_v16i64_v16i8(ptr %x) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_v16i64_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm0 @@ -1078,12 +1076,12 @@ define <16 x i8> @trunc_v16i64_v16i8(<16 x i64>* %x) nounwind "min-legal-vector- ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %a = load <16 x i64>, <16 x i64>* %x + %a = load <16 x i64>, ptr %x %b = trunc <16 x i64> %a to <16 x i8> ret <16 x i8> %b } -define <16 x i8> @trunc_v16i32_v16i8(<16 x i32>* %x) nounwind "min-legal-vector-width"="256" { +define <16 x i8> @trunc_v16i32_v16i8(ptr %x) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_v16i32_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm0 @@ -1093,12 +1091,12 @@ define <16 x i8> @trunc_v16i32_v16i8(<16 x i32>* %x) nounwind "min-legal-vector- ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %a = load <16 x i32>, <16 x i32>* %x + %a = load <16 x i32>, ptr %x %b = trunc <16 x i32> %a to <16 x i8> ret <16 x i8> %b } -define <8 x i8> @trunc_v8i64_v8i8(<8 x i64>* %x) nounwind "min-legal-vector-width"="256" { +define <8 x i8> @trunc_v8i64_v8i8(ptr %x) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_v8i64_v8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm0 @@ -1108,12 +1106,12 @@ define <8 x i8> @trunc_v8i64_v8i8(<8 x i64>* %x) nounwind "min-legal-vector-widt ; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %a = load <8 x i64>, <8 x i64>* %x + %a = load <8 x i64>, ptr %x %b = trunc <8 x i64> %a to <8 x i8> ret <8 x i8> %b } -define <8 x i16> @trunc_v8i64_v8i16(<8 x i64>* %x) nounwind "min-legal-vector-width"="256" { +define <8 x i16> @trunc_v8i64_v8i16(ptr %x) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_v8i64_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm0 @@ -1123,12 +1121,12 @@ define <8 x i16> @trunc_v8i64_v8i16(<8 x i64>* %x) nounwind "min-legal-vector-wi ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %a = load <8 x i64>, <8 x i64>* %x + %a = load <8 x i64>, ptr %x %b = trunc <8 x i64> %a to <8 x i16> ret <8 x i16> %b } -define <8 x i32> @trunc_v8i64_v8i32_zeroes(<8 x i64>* %x) nounwind "min-legal-vector-width"="256" { +define <8 x i32> @trunc_v8i64_v8i32_zeroes(ptr %x) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_v8i64_v8i32_zeroes: ; CHECK: # %bb.0: ; CHECK-NEXT: vpsrlq $48, 32(%rdi), %ymm0 @@ -1136,26 +1134,26 @@ define <8 x i32> @trunc_v8i64_v8i32_zeroes(<8 x i64>* %x) nounwind "min-legal-ve ; CHECK-NEXT: vpackusdw %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; CHECK-NEXT: retq - %a = load <8 x i64>, <8 x i64>* %x + %a = load <8 x i64>, ptr %x %b = lshr <8 x i64> %a, %c = trunc <8 x i64> %b to <8 x i32> ret <8 x i32> %c } -define <16 x i16> @trunc_v16i32_v16i16_zeroes(<16 x i32>* %x) nounwind "min-legal-vector-width"="256" { +define <16 x i16> @trunc_v16i32_v16i16_zeroes(ptr %x) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_v16i32_v16i16_zeroes: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm1 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31] ; CHECK-NEXT: vpermi2w 32(%rdi), %ymm1, %ymm0 ; CHECK-NEXT: retq - %a = load <16 x i32>, <16 x i32>* %x + %a = load <16 x i32>, ptr %x %b = lshr <16 x i32> %a, %c = trunc <16 x i32> %b to <16 x i16> ret <16 x i16> %c } -define <32 x i8> @trunc_v32i16_v32i8_zeroes(<32 x i16>* %x) nounwind "min-legal-vector-width"="256" { +define <32 x i8> @trunc_v32i16_v32i8_zeroes(ptr %x) nounwind "min-legal-vector-width"="256" { ; CHECK-SKX-VBMI-LABEL: trunc_v32i16_v32i8_zeroes: ; CHECK-SKX-VBMI: # %bb.0: ; CHECK-SKX-VBMI-NEXT: vmovdqa (%rdi), %ymm1 @@ -1177,13 +1175,13 @@ define <32 x i8> @trunc_v32i16_v32i8_zeroes(<32 x i16>* %x) nounwind "min-legal- ; CHECK-VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63] ; CHECK-VBMI-NEXT: vpermi2b 32(%rdi), %ymm1, %ymm0 ; CHECK-VBMI-NEXT: retq - %a = load <32 x i16>, <32 x i16>* %x + %a = load <32 x i16>, ptr %x %b = lshr <32 x i16> %a, %c = trunc <32 x i16> %b to <32 x i8> ret <32 x i8> %c } -define <8 x i32> @trunc_v8i64_v8i32_sign(<8 x i64>* %x) nounwind "min-legal-vector-width"="256" { +define <8 x i32> @trunc_v8i64_v8i32_sign(ptr %x) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_v8i64_v8i32_sign: ; CHECK: # %bb.0: ; CHECK-NEXT: vpsraq $48, 32(%rdi), %ymm0 @@ -1191,26 +1189,26 @@ define <8 x i32> @trunc_v8i64_v8i32_sign(<8 x i64>* %x) nounwind "min-legal-vect ; CHECK-NEXT: vpackssdw %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; CHECK-NEXT: retq - %a = load <8 x i64>, <8 x i64>* %x + %a = load <8 x i64>, ptr %x %b = ashr <8 x i64> %a, %c = trunc <8 x i64> %b to <8 x i32> ret <8 x i32> %c } -define <16 x i16> @trunc_v16i32_v16i16_sign(<16 x i32>* %x) nounwind "min-legal-vector-width"="256" { +define <16 x i16> @trunc_v16i32_v16i16_sign(ptr %x) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_v16i32_v16i16_sign: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm1 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31] ; CHECK-NEXT: vpermi2w 32(%rdi), %ymm1, %ymm0 ; CHECK-NEXT: retq - %a = load <16 x i32>, <16 x i32>* %x + %a = load <16 x i32>, ptr %x %b = ashr <16 x i32> %a, %c = trunc <16 x i32> %b to <16 x i16> ret <16 x i16> %c } -define <32 x i8> @trunc_v32i16_v32i8_sign(<32 x i16>* %x) nounwind "min-legal-vector-width"="256" { +define <32 x i8> @trunc_v32i16_v32i8_sign(ptr %x) nounwind "min-legal-vector-width"="256" { ; CHECK-SKX-VBMI-LABEL: trunc_v32i16_v32i8_sign: ; CHECK-SKX-VBMI: # %bb.0: ; CHECK-SKX-VBMI-NEXT: vmovdqa (%rdi), %ymm1 @@ -1232,13 +1230,13 @@ define <32 x i8> @trunc_v32i16_v32i8_sign(<32 x i16>* %x) nounwind "min-legal-ve ; CHECK-VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63] ; CHECK-VBMI-NEXT: vpermi2b 32(%rdi), %ymm1, %ymm0 ; CHECK-VBMI-NEXT: retq - %a = load <32 x i16>, <32 x i16>* %x + %a = load <32 x i16>, ptr %x %b = ashr <32 x i16> %a, %c = trunc <32 x i16> %b to <32 x i8> ret <32 x i8> %c } -define dso_local void @zext_v16i8_v16i64(<16 x i8> %x, <16 x i64>* %y) nounwind "min-legal-vector-width"="256" { +define dso_local void @zext_v16i8_v16i64(<16 x i8> %x, ptr %y) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: zext_v16i8_v16i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero @@ -1256,11 +1254,11 @@ define dso_local void @zext_v16i8_v16i64(<16 x i8> %x, <16 x i64>* %y) nounwind ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %a = zext <16 x i8> %x to <16 x i64> - store <16 x i64> %a, <16 x i64>* %y + store <16 x i64> %a, ptr %y ret void } -define dso_local void @sext_v16i8_v16i64(<16 x i8> %x, <16 x i64>* %y) nounwind "min-legal-vector-width"="256" { +define dso_local void @sext_v16i8_v16i64(<16 x i8> %x, ptr %y) nounwind "min-legal-vector-width"="256" { ; CHECK-LABEL: sext_v16i8_v16i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm1 @@ -1278,11 +1276,11 @@ define dso_local void @sext_v16i8_v16i64(<16 x i8> %x, <16 x i64>* %y) nounwind ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %a = sext <16 x i8> %x to <16 x i64> - store <16 x i64> %a, <16 x i64>* %y + store <16 x i64> %a, ptr %y ret void } -define dso_local void @vselect_split_v8i16_setcc(<8 x i16> %s, <8 x i16> %t, <8 x i64>* %p, <8 x i64>* %q, <8 x i64>* %r) "min-legal-vector-width"="256" { +define dso_local void @vselect_split_v8i16_setcc(<8 x i16> %s, <8 x i16> %t, ptr %p, ptr %q, ptr %r) "min-legal-vector-width"="256" { ; CHECK-LABEL: vselect_split_v8i16_setcc: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rsi), %ymm2 @@ -1295,15 +1293,15 @@ define dso_local void @vselect_split_v8i16_setcc(<8 x i16> %s, <8 x i16> %t, <8 ; CHECK-NEXT: vmovdqa %ymm3, 32(%rdx) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %x = load <8 x i64>, <8 x i64>* %p - %y = load <8 x i64>, <8 x i64>* %q + %x = load <8 x i64>, ptr %p + %y = load <8 x i64>, ptr %q %a = icmp eq <8 x i16> %s, %t %b = select <8 x i1> %a, <8 x i64> %x, <8 x i64> %y - store <8 x i64> %b, <8 x i64>* %r + store <8 x i64> %b, ptr %r ret void } -define dso_local void @vselect_split_v8i32_setcc(<8 x i32> %s, <8 x i32> %t, <8 x i64>* %p, <8 x i64>* %q, <8 x i64>* %r) "min-legal-vector-width"="256" { +define dso_local void @vselect_split_v8i32_setcc(<8 x i32> %s, <8 x i32> %t, ptr %p, ptr %q, ptr %r) "min-legal-vector-width"="256" { ; CHECK-LABEL: vselect_split_v8i32_setcc: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rsi), %ymm2 @@ -1316,15 +1314,15 @@ define dso_local void @vselect_split_v8i32_setcc(<8 x i32> %s, <8 x i32> %t, <8 ; CHECK-NEXT: vmovdqa %ymm3, 32(%rdx) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %x = load <8 x i64>, <8 x i64>* %p - %y = load <8 x i64>, <8 x i64>* %q + %x = load <8 x i64>, ptr %p + %y = load <8 x i64>, ptr %q %a = icmp eq <8 x i32> %s, %t %b = select <8 x i1> %a, <8 x i64> %x, <8 x i64> %y - store <8 x i64> %b, <8 x i64>* %r + store <8 x i64> %b, ptr %r ret void } -define dso_local void @vselect_split_v16i8_setcc(<16 x i8> %s, <16 x i8> %t, <16 x i32>* %p, <16 x i32>* %q, <16 x i32>* %r) "min-legal-vector-width"="256" { +define dso_local void @vselect_split_v16i8_setcc(<16 x i8> %s, <16 x i8> %t, ptr %p, ptr %q, ptr %r) "min-legal-vector-width"="256" { ; CHECK-LABEL: vselect_split_v16i8_setcc: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rsi), %ymm2 @@ -1337,15 +1335,15 @@ define dso_local void @vselect_split_v16i8_setcc(<16 x i8> %s, <16 x i8> %t, <16 ; CHECK-NEXT: vmovdqa %ymm3, 32(%rdx) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %x = load <16 x i32>, <16 x i32>* %p - %y = load <16 x i32>, <16 x i32>* %q + %x = load <16 x i32>, ptr %p + %y = load <16 x i32>, ptr %q %a = icmp eq <16 x i8> %s, %t %b = select <16 x i1> %a, <16 x i32> %x, <16 x i32> %y - store <16 x i32> %b, <16 x i32>* %r + store <16 x i32> %b, ptr %r ret void } -define dso_local void @vselect_split_v16i16_setcc(<16 x i16> %s, <16 x i16> %t, <16 x i32>* %p, <16 x i32>* %q, <16 x i32>* %r) "min-legal-vector-width"="256" { +define dso_local void @vselect_split_v16i16_setcc(<16 x i16> %s, <16 x i16> %t, ptr %p, ptr %q, ptr %r) "min-legal-vector-width"="256" { ; CHECK-LABEL: vselect_split_v16i16_setcc: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rsi), %ymm2 @@ -1358,15 +1356,15 @@ define dso_local void @vselect_split_v16i16_setcc(<16 x i16> %s, <16 x i16> %t, ; CHECK-NEXT: vmovdqa %ymm3, 32(%rdx) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %x = load <16 x i32>, <16 x i32>* %p - %y = load <16 x i32>, <16 x i32>* %q + %x = load <16 x i32>, ptr %p + %y = load <16 x i32>, ptr %q %a = icmp eq <16 x i16> %s, %t %b = select <16 x i1> %a, <16 x i32> %x, <16 x i32> %y - store <16 x i32> %b, <16 x i32>* %r + store <16 x i32> %b, ptr %r ret void } -define <16 x i8> @trunc_packus_v16i32_v16i8(<16 x i32>* %p) "min-legal-vector-width"="256" { +define <16 x i8> @trunc_packus_v16i32_v16i8(ptr %p) "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_packus_v16i32_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm0 @@ -1375,7 +1373,7 @@ define <16 x i8> @trunc_packus_v16i32_v16i8(<16 x i32>* %p) "min-legal-vector-wi ; CHECK-NEXT: vpmovuswb %ymm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %a = load <16 x i32>, <16 x i32>* %p + %a = load <16 x i32>, ptr %p %b = icmp slt <16 x i32> %a, %c = select <16 x i1> %b, <16 x i32> %a, <16 x i32> %d = icmp sgt <16 x i32> %c, zeroinitializer @@ -1384,7 +1382,7 @@ define <16 x i8> @trunc_packus_v16i32_v16i8(<16 x i32>* %p) "min-legal-vector-wi ret <16 x i8> %f } -define dso_local void @trunc_packus_v16i32_v16i8_store(<16 x i32>* %p, <16 x i8>* %q) "min-legal-vector-width"="256" { +define dso_local void @trunc_packus_v16i32_v16i8_store(ptr %p, ptr %q) "min-legal-vector-width"="256" { ; CHECK-LABEL: trunc_packus_v16i32_v16i8_store: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rdi), %ymm0 @@ -1393,13 +1391,13 @@ define dso_local void @trunc_packus_v16i32_v16i8_store(<16 x i32>* %p, <16 x i8> ; CHECK-NEXT: vpmovuswb %ymm0, (%rsi) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %a = load <16 x i32>, <16 x i32>* %p + %a = load <16 x i32>, ptr %p %b = icmp slt <16 x i32> %a, %c = select <16 x i1> %b, <16 x i32> %a, <16 x i32> %d = icmp sgt <16 x i32> %c, zeroinitializer %e = select <16 x i1> %d, <16 x i32> %c, <16 x i32> zeroinitializer %f = trunc <16 x i32> %e to <16 x i8> - store <16 x i8> %f, <16 x i8>* %q + store <16 x i8> %f, ptr %q ret void } @@ -1410,7 +1408,7 @@ define <64 x i1> @v64i1_argument_return(<64 x i1> %x) "min-legal-vector-width"=" ret <64 x i1> %x } -define dso_local void @v64i1_shuffle(<64 x i8>* %x, <64 x i8>* %y) "min-legal-vector-width"="256" { +define dso_local void @v64i1_shuffle(ptr %x, ptr %y) "min-legal-vector-width"="256" { ; CHECK-LABEL: v64i1_shuffle: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmovdqa (%rdi), %ymm1 @@ -1859,13 +1857,13 @@ define dso_local void @v64i1_shuffle(<64 x i8>* %x, <64 x i8>* %y) "min-legal-ve ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq entry: - %a = load <64 x i8>, <64 x i8>* %x + %a = load <64 x i8>, ptr %x %b = icmp eq <64 x i8> %a, zeroinitializer %shuf = shufflevector <64 x i1> %b, <64 x i1> undef, <64 x i32> - call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> %a, <64 x i8>* %y, i32 1, <64 x i1> %shuf) + call void @llvm.masked.store.v64i8.p0(<64 x i8> %a, ptr %y, i32 1, <64 x i1> %shuf) ret void } -declare void @llvm.masked.store.v64i8.p0v64i8(<64 x i8>, <64 x i8>*, i32, <64 x i1>) +declare void @llvm.masked.store.v64i8.p0(<64 x i8>, ptr, i32, <64 x i1>) @mem64_dst = dso_local global i64 0, align 8 @mem64_src = dso_local global i64 0, align 8 @@ -1879,14 +1877,14 @@ define dso_local i32 @v64i1_inline_asm() "min-legal-vector-width"="256" { ; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: retq %1 = alloca i32, align 4 - %2 = load i64, i64* @mem64_src, align 8 + %2 = load i64, ptr @mem64_src, align 8 %3 = call i64 asm "", "=k,k,~{dirflag},~{fpsr},~{flags}"(i64 %2) - store i64 %3, i64* @mem64_dst, align 8 - %4 = load i32, i32* %1, align 4 + store i64 %3, ptr @mem64_dst, align 8 + %4 = load i32, ptr %1, align 4 ret i32 %4 } -define dso_local void @cmp_v8i64_sext(<8 x i64>* %xptr, <8 x i64>* %yptr, <8 x i64>* %zptr) "min-legal-vector-width"="256" { +define dso_local void @cmp_v8i64_sext(ptr %xptr, ptr %yptr, ptr %zptr) "min-legal-vector-width"="256" { ; CHECK-LABEL: cmp_v8i64_sext: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rsi), %ymm0 @@ -1897,15 +1895,15 @@ define dso_local void @cmp_v8i64_sext(<8 x i64>* %xptr, <8 x i64>* %yptr, <8 x i ; CHECK-NEXT: vmovdqa %ymm1, 32(%rdx) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %x = load <8 x i64>, <8 x i64>* %xptr - %y = load <8 x i64>, <8 x i64>* %yptr + %x = load <8 x i64>, ptr %xptr + %y = load <8 x i64>, ptr %yptr %cmp = icmp slt <8 x i64> %x, %y %ext = sext <8 x i1> %cmp to <8 x i64> - store <8 x i64> %ext, <8 x i64>* %zptr + store <8 x i64> %ext, ptr %zptr ret void } -define dso_local void @cmp_v8i64_zext(<8 x i64>* %xptr, <8 x i64>* %yptr, <8 x i64>* %zptr) "min-legal-vector-width"="256" { +define dso_local void @cmp_v8i64_zext(ptr %xptr, ptr %yptr, ptr %zptr) "min-legal-vector-width"="256" { ; CHECK-LABEL: cmp_v8i64_zext: ; CHECK: # %bb.0: ; CHECK-NEXT: vmovdqa (%rsi), %ymm0 @@ -1918,11 +1916,11 @@ define dso_local void @cmp_v8i64_zext(<8 x i64>* %xptr, <8 x i64>* %yptr, <8 x i ; CHECK-NEXT: vmovdqa %ymm1, 32(%rdx) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq - %x = load <8 x i64>, <8 x i64>* %xptr - %y = load <8 x i64>, <8 x i64>* %yptr + %x = load <8 x i64>, ptr %xptr + %y = load <8 x i64>, ptr %yptr %cmp = icmp slt <8 x i64> %x, %y %ext = zext <8 x i1> %cmp to <8 x i64> - store <8 x i64> %ext, <8 x i64>* %zptr + store <8 x i64> %ext, ptr %zptr ret void } diff --git a/llvm/test/CodeGen/X86/sad.ll b/llvm/test/CodeGen/X86/sad.ll index cbe63a7c7b456..c0cc371fcb75f 100644 --- a/llvm/test/CodeGen/X86/sad.ll +++ b/llvm/test/CodeGen/X86/sad.ll @@ -1,11 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; ifndef INTEL_SYCL_OPAQUEPOINTER_READY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW -; end @a = dso_local global [1024 x i8] zeroinitializer, align 16 @b = dso_local global [1024 x i8] zeroinitializer, align 16 @@ -119,13 +117,13 @@ entry: vector.body: %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %vec.phi = phi <16 x i32> [ zeroinitializer, %entry ], [ %10, %vector.body ] - %0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %index - %1 = bitcast i8* %0 to <16 x i8>* - %wide.load = load <16 x i8>, <16 x i8>* %1, align 4 + %0 = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <16 x i8>, ptr %1, align 4 %2 = zext <16 x i8> %wide.load to <16 x i32> - %3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %index - %4 = bitcast i8* %3 to <16 x i8>* - %wide.load1 = load <16 x i8>, <16 x i8>* %4, align 4 + %3 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %index + %4 = bitcast ptr %3 to ptr + %wide.load1 = load <16 x i8>, ptr %4, align 4 %5 = zext <16 x i8> %wide.load1 to <16 x i32> %6 = sub nsw <16 x i32> %2, %5 %7 = icmp sgt <16 x i32> %6, @@ -276,13 +274,13 @@ entry: vector.body: %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %vec.phi = phi <32 x i32> [ zeroinitializer, %entry ], [ %10, %vector.body ] - %0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %index - %1 = bitcast i8* %0 to <32 x i8>* - %wide.load = load <32 x i8>, <32 x i8>* %1, align 32 + %0 = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <32 x i8>, ptr %1, align 32 %2 = zext <32 x i8> %wide.load to <32 x i32> - %3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %index - %4 = bitcast i8* %3 to <32 x i8>* - %wide.load1 = load <32 x i8>, <32 x i8>* %4, align 32 + %3 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %index + %4 = bitcast ptr %3 to ptr + %wide.load1 = load <32 x i8>, ptr %4, align 32 %5 = zext <32 x i8> %wide.load1 to <32 x i32> %6 = sub nsw <32 x i32> %2, %5 %7 = icmp sgt <32 x i32> %6, @@ -507,13 +505,13 @@ entry: vector.body: %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %vec.phi = phi <64 x i32> [ zeroinitializer, %entry ], [ %10, %vector.body ] - %0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %index - %1 = bitcast i8* %0 to <64 x i8>* - %wide.load = load <64 x i8>, <64 x i8>* %1, align 64 + %0 = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <64 x i8>, ptr %1, align 64 %2 = zext <64 x i8> %wide.load to <64 x i32> - %3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %index - %4 = bitcast i8* %3 to <64 x i8>* - %wide.load1 = load <64 x i8>, <64 x i8>* %4, align 64 + %3 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %index + %4 = bitcast ptr %3 to ptr + %wide.load1 = load <64 x i8>, ptr %4, align 64 %5 = zext <64 x i8> %wide.load1 to <64 x i32> %6 = sub nsw <64 x i32> %2, %5 %7 = icmp sgt <64 x i32> %6, @@ -591,13 +589,13 @@ entry: vector.body: %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %vec.phi = phi <2 x i32> [ zeroinitializer, %entry ], [ %10, %vector.body ] - %0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %index - %1 = bitcast i8* %0 to <2 x i8>* - %wide.load = load <2 x i8>, <2 x i8>* %1, align 4 + %0 = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <2 x i8>, ptr %1, align 4 %2 = zext <2 x i8> %wide.load to <2 x i32> - %3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %index - %4 = bitcast i8* %3 to <2 x i8>* - %wide.load1 = load <2 x i8>, <2 x i8>* %4, align 4 + %3 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %index + %4 = bitcast ptr %3 to ptr + %wide.load1 = load <2 x i8>, ptr %4, align 4 %5 = zext <2 x i8> %wide.load1 to <2 x i32> %6 = sub nsw <2 x i32> %2, %5 %7 = icmp sgt <2 x i32> %6, @@ -663,13 +661,13 @@ entry: vector.body: %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %vec.phi = phi <4 x i32> [ zeroinitializer, %entry ], [ %10, %vector.body ] - %0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %index - %1 = bitcast i8* %0 to <4 x i8>* - %wide.load = load <4 x i8>, <4 x i8>* %1, align 4 + %0 = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %index + %1 = bitcast ptr %0 to ptr + %wide.load = load <4 x i8>, ptr %1, align 4 %2 = zext <4 x i8> %wide.load to <4 x i32> - %3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %index - %4 = bitcast i8* %3 to <4 x i8>* - %wide.load1 = load <4 x i8>, <4 x i8>* %4, align 4 + %3 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %index + %4 = bitcast ptr %3 to ptr + %wide.load1 = load <4 x i8>, ptr %4, align 4 %5 = zext <4 x i8> %wide.load1 to <4 x i32> %6 = sub nsw <4 x i32> %2, %5 %7 = icmp sgt <4 x i32> %6, @@ -690,7 +688,7 @@ middle.block: } -define dso_local i32 @sad_nonloop_4i8(<4 x i8>* nocapture readonly %p, i64, <4 x i8>* nocapture readonly %q) local_unnamed_addr #0 { +define dso_local i32 @sad_nonloop_4i8(ptr nocapture readonly %p, i64, ptr nocapture readonly %q) local_unnamed_addr #0 { ; SSE2-LABEL: sad_nonloop_4i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -706,9 +704,9 @@ define dso_local i32 @sad_nonloop_4i8(<4 x i8>* nocapture readonly %p, i64, <4 x ; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq - %v1 = load <4 x i8>, <4 x i8>* %p, align 1 + %v1 = load <4 x i8>, ptr %p, align 1 %z1 = zext <4 x i8> %v1 to <4 x i32> - %v2 = load <4 x i8>, <4 x i8>* %q, align 1 + %v2 = load <4 x i8>, ptr %q, align 1 %z2 = zext <4 x i8> %v2 to <4 x i32> %sub = sub nsw <4 x i32> %z1, %z2 %isneg = icmp sgt <4 x i32> %sub, @@ -722,7 +720,7 @@ define dso_local i32 @sad_nonloop_4i8(<4 x i8>* nocapture readonly %p, i64, <4 x ret i32 %sum } -define dso_local i32 @sad_nonloop_8i8(<8 x i8>* nocapture readonly %p, i64, <8 x i8>* nocapture readonly %q) local_unnamed_addr #0 { +define dso_local i32 @sad_nonloop_8i8(ptr nocapture readonly %p, i64, ptr nocapture readonly %q) local_unnamed_addr #0 { ; SSE2-LABEL: sad_nonloop_8i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero @@ -738,9 +736,9 @@ define dso_local i32 @sad_nonloop_8i8(<8 x i8>* nocapture readonly %p, i64, <8 x ; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq - %v1 = load <8 x i8>, <8 x i8>* %p, align 1 + %v1 = load <8 x i8>, ptr %p, align 1 %z1 = zext <8 x i8> %v1 to <8 x i32> - %v2 = load <8 x i8>, <8 x i8>* %q, align 1 + %v2 = load <8 x i8>, ptr %q, align 1 %z2 = zext <8 x i8> %v2 to <8 x i32> %sub = sub nsw <8 x i32> %z1, %z2 %isneg = icmp sgt <8 x i32> %sub, @@ -756,7 +754,7 @@ define dso_local i32 @sad_nonloop_8i8(<8 x i8>* nocapture readonly %p, i64, <8 x ret i32 %sum } -define dso_local i32 @sad_nonloop_16i8(<16 x i8>* nocapture readonly %p, i64, <16 x i8>* nocapture readonly %q) local_unnamed_addr #0 { +define dso_local i32 @sad_nonloop_16i8(ptr nocapture readonly %p, i64, ptr nocapture readonly %q) local_unnamed_addr #0 { ; SSE2-LABEL: sad_nonloop_16i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqu (%rdi), %xmm0 @@ -775,9 +773,9 @@ define dso_local i32 @sad_nonloop_16i8(<16 x i8>* nocapture readonly %p, i64, <1 ; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq - %v1 = load <16 x i8>, <16 x i8>* %p, align 1 + %v1 = load <16 x i8>, ptr %p, align 1 %z1 = zext <16 x i8> %v1 to <16 x i32> - %v2 = load <16 x i8>, <16 x i8>* %q, align 1 + %v2 = load <16 x i8>, ptr %q, align 1 %z2 = zext <16 x i8> %v2 to <16 x i32> %sub = sub nsw <16 x i32> %z1, %z2 %isneg = icmp sgt <16 x i32> %sub, @@ -795,7 +793,7 @@ define dso_local i32 @sad_nonloop_16i8(<16 x i8>* nocapture readonly %p, i64, <1 ret i32 %sum } -define dso_local i32 @sad_nonloop_32i8(<32 x i8>* nocapture readonly %p, i64, <32 x i8>* nocapture readonly %q) local_unnamed_addr #0 { +define dso_local i32 @sad_nonloop_32i8(ptr nocapture readonly %p, i64, ptr nocapture readonly %q) local_unnamed_addr #0 { ; SSE2-LABEL: sad_nonloop_32i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqu (%rdx), %xmm0 @@ -845,9 +843,9 @@ define dso_local i32 @sad_nonloop_32i8(<32 x i8>* nocapture readonly %p, i64, <3 ; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %v1 = load <32 x i8>, <32 x i8>* %p, align 1 + %v1 = load <32 x i8>, ptr %p, align 1 %z1 = zext <32 x i8> %v1 to <32 x i32> - %v2 = load <32 x i8>, <32 x i8>* %q, align 1 + %v2 = load <32 x i8>, ptr %q, align 1 %z2 = zext <32 x i8> %v2 to <32 x i32> %sub = sub nsw <32 x i32> %z1, %z2 %isneg = icmp sgt <32 x i32> %sub, @@ -867,7 +865,7 @@ define dso_local i32 @sad_nonloop_32i8(<32 x i8>* nocapture readonly %p, i64, <3 ret i32 %sum } -define dso_local i32 @sad_nonloop_64i8(<64 x i8>* nocapture readonly %p, i64, <64 x i8>* nocapture readonly %q) local_unnamed_addr #0 { +define dso_local i32 @sad_nonloop_64i8(ptr nocapture readonly %p, i64, ptr nocapture readonly %q) local_unnamed_addr #0 { ; SSE2-LABEL: sad_nonloop_64i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqu (%rdx), %xmm0 @@ -952,9 +950,9 @@ define dso_local i32 @sad_nonloop_64i8(<64 x i8>* nocapture readonly %p, i64, <6 ; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq - %v1 = load <64 x i8>, <64 x i8>* %p, align 1 + %v1 = load <64 x i8>, ptr %p, align 1 %z1 = zext <64 x i8> %v1 to <64 x i32> - %v2 = load <64 x i8>, <64 x i8>* %q, align 1 + %v2 = load <64 x i8>, ptr %q, align 1 %z2 = zext <64 x i8> %v2 to <64 x i32> %sub = sub nsw <64 x i32> %z1, %z2 %isneg = icmp sgt <64 x i32> %sub, @@ -978,7 +976,7 @@ define dso_local i32 @sad_nonloop_64i8(<64 x i8>* nocapture readonly %p, i64, <6 ; This contains an unrolled sad loop with a non-zero initial value. ; DAGCombiner reassociation previously rewrote the adds to move the constant vector further down the tree. This resulted in the vector-reduction flag being lost. -define dso_local i32 @sad_unroll_nonzero_initial(<16 x i8>* %arg, <16 x i8>* %arg1, <16 x i8>* %arg2, <16 x i8>* %arg3) { +define dso_local i32 @sad_unroll_nonzero_initial(ptr %arg, ptr %arg1, ptr %arg2, ptr %arg3) { ; SSE2-LABEL: sad_unroll_nonzero_initial: ; SSE2: # %bb.0: # %bb ; SSE2-NEXT: movdqu (%rdi), %xmm0 @@ -1011,8 +1009,8 @@ define dso_local i32 @sad_unroll_nonzero_initial(<16 x i8>* %arg, <16 x i8>* %ar ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq bb: - %tmp = load <16 x i8>, <16 x i8>* %arg, align 1 - %tmp4 = load <16 x i8>, <16 x i8>* %arg1, align 1 + %tmp = load <16 x i8>, ptr %arg, align 1 + %tmp4 = load <16 x i8>, ptr %arg1, align 1 %tmp5 = zext <16 x i8> %tmp to <16 x i32> %tmp6 = zext <16 x i8> %tmp4 to <16 x i32> %tmp7 = sub nsw <16 x i32> %tmp5, %tmp6 @@ -1020,8 +1018,8 @@ bb: %tmp9 = sub nsw <16 x i32> zeroinitializer, %tmp7 %tmp10 = select <16 x i1> %tmp8, <16 x i32> %tmp9, <16 x i32> %tmp7 %tmp11 = add nuw nsw <16 x i32> %tmp10, - %tmp12 = load <16 x i8>, <16 x i8>* %arg2, align 1 - %tmp13 = load <16 x i8>, <16 x i8>* %arg3, align 1 + %tmp12 = load <16 x i8>, ptr %arg2, align 1 + %tmp13 = load <16 x i8>, ptr %arg3, align 1 %tmp14 = zext <16 x i8> %tmp12 to <16 x i32> %tmp15 = zext <16 x i8> %tmp13 to <16 x i32> %tmp16 = sub nsw <16 x i32> %tmp14, %tmp15 @@ -1043,7 +1041,7 @@ bb: ; This test contains two absolute difference patterns joined by an add. The result of that add is then reduced to a single element. ; SelectionDAGBuilder should tag the joining add as a vector reduction. We neeed to recognize that both sides can use psadbw. -define dso_local i32 @sad_double_reduction(<16 x i8>* %arg, <16 x i8>* %arg1, <16 x i8>* %arg2, <16 x i8>* %arg3) { +define dso_local i32 @sad_double_reduction(ptr %arg, ptr %arg1, ptr %arg2, ptr %arg3) { ; SSE2-LABEL: sad_double_reduction: ; SSE2: # %bb.0: # %bb ; SSE2-NEXT: movdqu (%rdi), %xmm0 @@ -1074,16 +1072,16 @@ define dso_local i32 @sad_double_reduction(<16 x i8>* %arg, <16 x i8>* %arg1, <1 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq bb: - %tmp = load <16 x i8>, <16 x i8>* %arg, align 1 - %tmp4 = load <16 x i8>, <16 x i8>* %arg1, align 1 + %tmp = load <16 x i8>, ptr %arg, align 1 + %tmp4 = load <16 x i8>, ptr %arg1, align 1 %tmp5 = zext <16 x i8> %tmp to <16 x i32> %tmp6 = zext <16 x i8> %tmp4 to <16 x i32> %tmp7 = sub nsw <16 x i32> %tmp5, %tmp6 %tmp8 = icmp slt <16 x i32> %tmp7, zeroinitializer %tmp9 = sub nsw <16 x i32> zeroinitializer, %tmp7 %tmp10 = select <16 x i1> %tmp8, <16 x i32> %tmp9, <16 x i32> %tmp7 - %tmp11 = load <16 x i8>, <16 x i8>* %arg2, align 1 - %tmp12 = load <16 x i8>, <16 x i8>* %arg3, align 1 + %tmp11 = load <16 x i8>, ptr %arg2, align 1 + %tmp12 = load <16 x i8>, ptr %arg3, align 1 %tmp13 = zext <16 x i8> %tmp11 to <16 x i32> %tmp14 = zext <16 x i8> %tmp12 to <16 x i32> %tmp15 = sub nsw <16 x i32> %tmp13, %tmp14 @@ -1105,7 +1103,7 @@ bb: ; This test contains two absolute difference patterns joined by an add. The result of that add is then reduced to a single element. ; SelectionDAGBuilder should tag the joining add as a vector reduction. We neeed to recognize that both sides can use psadbw. -define dso_local i32 @sad_double_reduction_abs(<16 x i8>* %arg, <16 x i8>* %arg1, <16 x i8>* %arg2, <16 x i8>* %arg3) { +define dso_local i32 @sad_double_reduction_abs(ptr %arg, ptr %arg1, ptr %arg2, ptr %arg3) { ; SSE2-LABEL: sad_double_reduction_abs: ; SSE2: # %bb.0: # %bb ; SSE2-NEXT: movdqu (%rdi), %xmm0 @@ -1136,14 +1134,14 @@ define dso_local i32 @sad_double_reduction_abs(<16 x i8>* %arg, <16 x i8>* %arg1 ; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: retq bb: - %tmp = load <16 x i8>, <16 x i8>* %arg, align 1 - %tmp4 = load <16 x i8>, <16 x i8>* %arg1, align 1 + %tmp = load <16 x i8>, ptr %arg, align 1 + %tmp4 = load <16 x i8>, ptr %arg1, align 1 %tmp5 = zext <16 x i8> %tmp to <16 x i32> %tmp6 = zext <16 x i8> %tmp4 to <16 x i32> %tmp7 = sub nsw <16 x i32> %tmp5, %tmp6 %tmp10 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %tmp7, i1 false) - %tmp11 = load <16 x i8>, <16 x i8>* %arg2, align 1 - %tmp12 = load <16 x i8>, <16 x i8>* %arg3, align 1 + %tmp11 = load <16 x i8>, ptr %arg2, align 1 + %tmp12 = load <16 x i8>, ptr %arg3, align 1 %tmp13 = zext <16 x i8> %tmp11 to <16 x i32> %tmp14 = zext <16 x i8> %tmp12 to <16 x i32> %tmp15 = sub nsw <16 x i32> %tmp13, %tmp14 diff --git a/llvm/test/ExecutionEngine/Orc/global-variable-alignment.ll b/llvm/test/ExecutionEngine/Orc/global-variable-alignment.ll index 16fc3ad0547dc..3a4528a025ddf 100644 --- a/llvm/test/ExecutionEngine/Orc/global-variable-alignment.ll +++ b/llvm/test/ExecutionEngine/Orc/global-variable-alignment.ll @@ -7,22 +7,20 @@ ; A failure may indicate a problem with alignment handling in the JIT linker or ; JIT memory manager. ; -; ifndef INTEL_SYCL_OPAQUEPOINTER_READY ; RUN: %lli %s -; endif @A = internal global i8 1, align 1 @B = global i64 1, align 8 @C = internal global i8 1, align 1 -define i32 @main(i32 %argc, i8** %argv) { +define i32 @main(i32 %argc, ptr %argv) { entry: - %0 = ptrtoint i8* @B to i32 + %0 = ptrtoint ptr @B to i32 %1 = and i32 %0, 7 - %2 = load i8, i8* @A + %2 = load i8, ptr @A %3 = zext i8 %2 to i32 %4 = add i32 %1, %3 - %5 = load i8, i8* @C + %5 = load i8, ptr @C %6 = zext i8 %5 to i32 %7 = sub i32 %4, %6 ret i32 %7 diff --git a/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll b/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll index bbffa2a8c8ea6..854c3bc4aeea9 100644 --- a/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll +++ b/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll @@ -1,8 +1,4 @@ -; ifdef INTEL_SYCL_OPAQUEPOINTER_READY -; COM: opt < %s -passes=instrprof -S | FileCheck %s -; else ; RUN: opt < %s -passes=instrprof -S | FileCheck %s -; endif target triple = "aarch64-unknown-linux-gnu" @@ -10,11 +6,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK: @__profc_foo = private global [9 x i8] c"\FF\FF\FF\FF\FF\FF\FF\FF\FF", section "__llvm_prf_cnts", comdat, align 8 define void @_Z3foov() { - call void @llvm.instrprof.timestamp(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 9, i32 0) + call void @llvm.instrprof.timestamp(ptr getelementptr inbounds ([3 x i8], ptr @__profn_foo, i32 0, i32 0), i64 12345678, i32 9, i32 0) ; CHECK: call void @__llvm_profile_set_timestamp(ptr @__profc_foo) - call void @llvm.instrprof.cover(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 9, i32 8) + call void @llvm.instrprof.cover(ptr getelementptr inbounds ([3 x i8], ptr @__profn_foo, i32 0, i32 0), i64 12345678, i32 9, i32 8) ret void } -declare void @llvm.instrprof.timestamp(i8*, i64, i32, i32) -declare void @llvm.instrprof.cover(i8*, i64, i32, i32) +declare void @llvm.instrprof.timestamp(ptr, i64, i32, i32) +declare void @llvm.instrprof.cover(ptr, i64, i32, i32) diff --git a/llvm/test/Instrumentation/InstrProfiling/timestamp.ll b/llvm/test/Instrumentation/InstrProfiling/timestamp.ll index 47f0a1ab34a2e..be4c9236e3785 100644 --- a/llvm/test/Instrumentation/InstrProfiling/timestamp.ll +++ b/llvm/test/Instrumentation/InstrProfiling/timestamp.ll @@ -1,8 +1,4 @@ -; ifdef INTEL_SYCL_OPAQUEPOINTER_READY -; COM: opt < %s -passes=instrprof -S | FileCheck %s -; else ; RUN: opt < %s -passes=instrprof -S | FileCheck %s -; endif target triple = "aarch64-unknown-linux-gnu" @@ -10,11 +6,11 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK: @__profc_foo = private global [2 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8 define void @_Z3foov() { - call void @llvm.instrprof.timestamp(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 2, i32 0) + call void @llvm.instrprof.timestamp(ptr getelementptr inbounds ([3 x i8], ptr @__profn_foo, i32 0, i32 0), i64 12345678, i32 2, i32 0) ; CHECK: call void @__llvm_profile_set_timestamp(ptr @__profc_foo) - call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 2, i32 1) + call void @llvm.instrprof.increment(ptr getelementptr inbounds ([3 x i8], ptr @__profn_foo, i32 0, i32 0), i64 12345678, i32 2, i32 1) ret void } -declare void @llvm.instrprof.timestamp(i8*, i64, i32, i32) -declare void @llvm.instrprof.increment(i8*, i64, i32, i32) +declare void @llvm.instrprof.timestamp(ptr, i64, i32, i32) +declare void @llvm.instrprof.increment(ptr, i64, i32, i32) diff --git a/llvm/test/Transforms/GVNHoist/infinite-loop-indirect.ll b/llvm/test/Transforms/GVNHoist/infinite-loop-indirect.ll index 12e45dceca90f..61fa0e07f3480 100644 --- a/llvm/test/Transforms/GVNHoist/infinite-loop-indirect.ll +++ b/llvm/test/Transforms/GVNHoist/infinite-loop-indirect.ll @@ -1,20 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; ifdef INTEL_SYCL_OPAQUEPOINTER_READY -; COM: opt -S -passes=gvn-hoist < %s | FileCheck %s -; else ; RUN: opt -S -passes=gvn-hoist < %s | FileCheck %s -; endif ; Checking gvn-hoist in case of indirect branches. -%class.bar = type { i8*, %class.base* } +%class.bar = type { ptr, ptr } %class.base = type { i32 (...)** } @bar = local_unnamed_addr global i32 ()* null, align 8 @bar1 = local_unnamed_addr global i32 ()* null, align 8 ; Check that the bitcast is not hoisted because it is after an indirect call -define i32 @foo(i32* nocapture readonly %i) { +define i32 @foo(ptr nocapture readonly %i) { ; CHECK-LABEL: define i32 @foo ; CHECK-SAME: (ptr nocapture readonly [[I:%.*]]) { ; CHECK-NEXT: entry: @@ -40,21 +36,21 @@ define i32 @foo(i32* nocapture readonly %i) { ; entry: %agg.tmp = alloca %class.bar, align 8 - %x= getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1 - %y = load %class.base*, %class.base** %x, align 8 - %0 = load i32, i32* %i, align 4 + %x= getelementptr inbounds %class.bar, ptr %agg.tmp, i64 0, i32 1 + %y = load ptr, ptr %x, align 8 + %0 = load i32, ptr %i, align 4 %.off = add i32 %0, -1 %switch = icmp ult i32 %.off, 2 br i1 %switch, label %l1.preheader, label %sw.default l1.preheader: ; preds = %sw.default, %entry - %b1 = bitcast %class.base* %y to void (%class.base*)*** + %b1 = bitcast ptr %y to void (ptr)*** br label %l1 l1: ; preds = %l1.preheader, %l1 %1 = load i32 ()*, i32 ()** @bar, align 8 %call = tail call i32 %1() - %b2 = bitcast %class.base* %y to void (%class.base*)*** + %b2 = bitcast ptr %y to void (ptr)*** br label %l1 sw.default: ; preds = %entry @@ -66,7 +62,7 @@ sw.default: ; preds = %entry ; Any instruction inside an infinite loop will not be hoisted because ; there is no path to exit of the function. -define i32 @foo1(i32* nocapture readonly %i) { +define i32 @foo1(ptr nocapture readonly %i) { ; CHECK-LABEL: define i32 @foo1 ; CHECK-SAME: (ptr nocapture readonly [[I:%.*]]) { ; CHECK-NEXT: entry: @@ -94,22 +90,22 @@ define i32 @foo1(i32* nocapture readonly %i) { ; entry: %agg.tmp = alloca %class.bar, align 8 - %x= getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1 - %y = load %class.base*, %class.base** %x, align 8 - %0 = load i32, i32* %i, align 4 + %x= getelementptr inbounds %class.bar, ptr %agg.tmp, i64 0, i32 1 + %y = load ptr, ptr %x, align 8 + %0 = load i32, ptr %i, align 4 %.off = add i32 %0, -1 %switch = icmp ult i32 %.off, 2 br i1 %switch, label %l1.preheader, label %sw.default l1.preheader: ; preds = %sw.default, %entry - %b1 = bitcast %class.base* %y to void (%class.base*)*** - %y1 = load %class.base*, %class.base** %x, align 8 + %b1 = bitcast ptr %y to void (ptr)*** + %y1 = load ptr, ptr %x, align 8 br label %l1 l1: ; preds = %l1.preheader, %l1 - %b2 = bitcast %class.base* %y to void (%class.base*)*** + %b2 = bitcast ptr %y to void (ptr)*** %1 = load i32 ()*, i32 ()** @bar, align 8 - %y2 = load %class.base*, %class.base** %x, align 8 + %y2 = load ptr, ptr %x, align 8 %call = tail call i32 %1() br label %l1 @@ -120,7 +116,7 @@ sw.default: ; preds = %entry } ; Check that bitcast is hoisted even when one of them is partially redundant. -define i32 @test13(i32* %P, i8* %Ptr, i32* nocapture readonly %i) { +define i32 @test13(ptr %P, ptr %Ptr, ptr nocapture readonly %i) { ; CHECK-LABEL: define i32 @test13 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[PTR:%.*]], ptr nocapture readonly [[I:%.*]]) { ; CHECK-NEXT: entry: @@ -143,18 +139,18 @@ define i32 @test13(i32* %P, i8* %Ptr, i32* nocapture readonly %i) { ; entry: %agg.tmp = alloca %class.bar, align 8 - %x= getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1 - %y = load %class.base*, %class.base** %x, align 8 - indirectbr i8* %Ptr, [label %BrBlock, label %B2] + %x= getelementptr inbounds %class.bar, ptr %agg.tmp, i64 0, i32 1 + %y = load ptr, ptr %x, align 8 + indirectbr ptr %Ptr, [label %BrBlock, label %B2] B2: - %b1 = bitcast %class.base* %y to void (%class.base*)*** - store i32 4, i32 *%P + %b1 = bitcast ptr %y to void (ptr)*** + store i32 4, ptr%P br label %BrBlock BrBlock: - %b2 = bitcast %class.base* %y to void (%class.base*)*** - %L = load i32, i32* %P + %b2 = bitcast ptr %y to void (ptr)*** + %L = load i32, ptr %P %C = icmp eq i32 %L, 42 br i1 %C, label %T, label %F @@ -167,7 +163,7 @@ F: ; Check that the bitcast is not hoisted because anticipability ; cannot be guaranteed here as one of the indirect branch targets ; do not have the bitcast instruction. -define i32 @test14(i32* %P, i8* %Ptr, i32* nocapture readonly %i) { +define i32 @test14(ptr %P, ptr %Ptr, ptr nocapture readonly %i) { ; CHECK-LABEL: define i32 @test14 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[PTR:%.*]], ptr nocapture readonly [[I:%.*]]) { ; CHECK-NEXT: entry: @@ -193,33 +189,33 @@ define i32 @test14(i32* %P, i8* %Ptr, i32* nocapture readonly %i) { ; entry: %agg.tmp = alloca %class.bar, align 8 - %x= getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1 - %y = load %class.base*, %class.base** %x, align 8 - indirectbr i8* %Ptr, [label %BrBlock, label %B2, label %T] + %x= getelementptr inbounds %class.bar, ptr %agg.tmp, i64 0, i32 1 + %y = load ptr, ptr %x, align 8 + indirectbr ptr %Ptr, [label %BrBlock, label %B2, label %T] B2: - %b1 = bitcast %class.base* %y to void (%class.base*)*** - store i32 4, i32 *%P + %b1 = bitcast ptr %y to void (ptr)*** + store i32 4, ptr%P br label %BrBlock BrBlock: - %b2 = bitcast %class.base* %y to void (%class.base*)*** - %L = load i32, i32* %P + %b2 = bitcast ptr %y to void (ptr)*** + %L = load i32, ptr %P %C = icmp eq i32 %L, 42 br i1 %C, label %T, label %F T: - %pi = load i32, i32* %i, align 4 + %pi = load i32, ptr %i, align 4 ret i32 %pi F: - %pl = load i32, i32* %P + %pl = load i32, ptr %P ret i32 %pl } ; Check that the bitcast is not hoisted because of a cycle ; due to indirect branches -define i32 @test16(i32* %P, i8* %Ptr, i32* nocapture readonly %i) { +define i32 @test16(ptr %P, ptr %Ptr, ptr nocapture readonly %i) { ; CHECK-LABEL: define i32 @test16 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[PTR:%.*]], ptr nocapture readonly [[I:%.*]]) { ; CHECK-NEXT: entry: @@ -244,31 +240,31 @@ define i32 @test16(i32* %P, i8* %Ptr, i32* nocapture readonly %i) { ; entry: %agg.tmp = alloca %class.bar, align 8 - %x= getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1 - %y = load %class.base*, %class.base** %x, align 8 - indirectbr i8* %Ptr, [label %BrBlock, label %B2] + %x= getelementptr inbounds %class.bar, ptr %agg.tmp, i64 0, i32 1 + %y = load ptr, ptr %x, align 8 + indirectbr ptr %Ptr, [label %BrBlock, label %B2] B2: - %b1 = bitcast %class.base* %y to void (%class.base*)*** - %0 = load i32, i32* %i, align 4 - store i32 %0, i32 *%P + %b1 = bitcast ptr %y to void (ptr)*** + %0 = load i32, ptr %i, align 4 + store i32 %0, ptr%P br label %BrBlock BrBlock: - %b2 = bitcast %class.base* %y to void (%class.base*)*** - %L = load i32, i32* %P + %b2 = bitcast ptr %y to void (ptr)*** + %L = load i32, ptr %P %C = icmp eq i32 %L, 42 br i1 %C, label %T, label %F T: - indirectbr i32* %P, [label %BrBlock, label %B2] + indirectbr ptr %P, [label %BrBlock, label %B2] F: - indirectbr i8* %Ptr, [label %BrBlock, label %B2] + indirectbr ptr %Ptr, [label %BrBlock, label %B2] } -@_ZTIi = external constant i8* +@_ZTIi = external constant ptr ; Check that an instruction is not hoisted out of landing pad (%lpad4) ; Also within a landing pad no redundancies are removed by gvn-hoist, @@ -276,7 +272,7 @@ F: ; landing pad has direct branches (e.g., %lpad to %catch1, %catch) ; This CFG has a cycle (%lpad -> %catch1 -> %lpad4 -> %lpad) -define i32 @foo2(i32* nocapture readonly %i) local_unnamed_addr personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define i32 @foo2(ptr nocapture readonly %i) local_unnamed_addr personality ptr bitcast (i32 (...)* @__gxx_personality_v0 to ptr) { ; CHECK-LABEL: define i32 @foo2 ; CHECK-SAME: (ptr nocapture readonly [[I:%.*]]) local_unnamed_addr personality ptr @__gxx_personality_v0 { ; CHECK-NEXT: entry: @@ -326,28 +322,28 @@ define i32 @foo2(i32* nocapture readonly %i) local_unnamed_addr personality i8* ; CHECK-NEXT: ret i32 [[BC2]] ; entry: - %0 = load i32, i32* %i, align 4 + %0 = load i32, ptr %i, align 4 %cmp = icmp eq i32 %0, 0 br i1 %cmp, label %try.cont, label %if.then if.then: - %exception = tail call i8* @__cxa_allocate_exception(i64 4) #2 - %1 = bitcast i8* %exception to i32* - store i32 %0, i32* %1, align 16 - invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #3 + %exception = tail call ptr @__cxa_allocate_exception(i64 4) #2 + %1 = bitcast ptr %exception to ptr + store i32 %0, ptr %1, align 16 + invoke void @__cxa_throw(ptr %exception, ptr bitcast (ptr @_ZTIi to ptr), ptr null) #3 to label %unreachable unwind label %lpad lpad: - %2 = landingpad { i8*, i32 } - catch i8* bitcast (i8** @_ZTIi to i8*) - catch i8* null + %2 = landingpad { ptr, i32 } + catch ptr bitcast (ptr @_ZTIi to ptr) + catch ptr null %bc1 = add i32 %0, 10 - %3 = extractvalue { i8*, i32 } %2, 0 - %4 = extractvalue { i8*, i32 } %2, 1 - %5 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #2 + %3 = extractvalue { ptr, i32 } %2, 0 + %4 = extractvalue { ptr, i32 } %2, 1 + %5 = tail call i32 @llvm.eh.typeid.for(ptr bitcast (ptr @_ZTIi to ptr)) #2 %matches = icmp eq i32 %4, %5 %bc7 = add i32 %0, 10 - %6 = tail call i8* @__cxa_begin_catch(i8* %3) #2 + %6 = tail call ptr @__cxa_begin_catch(ptr %3) #2 br i1 %matches, label %catch1, label %catch catch1: @@ -357,17 +353,17 @@ catch1: catch: %bc4 = add i32 %0, 10 - %7 = load i32, i32* %i, align 4 + %7 = load i32, ptr %i, align 4 %add = add nsw i32 %7, 1 tail call void @__cxa_end_catch() br label %try.cont lpad4: - %8 = landingpad { i8*, i32 } + %8 = landingpad { ptr, i32 } cleanup %bc5 = add i32 %0, 10 tail call void @__cxa_end_catch() #2 - invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #3 + invoke void @__cxa_throw(ptr %exception, ptr bitcast (ptr @_ZTIi to ptr), ptr null) #3 to label %unreachable unwind label %lpad try.cont: @@ -380,16 +376,16 @@ unreachable: ret i32 %bc2 } -declare i8* @__cxa_allocate_exception(i64) local_unnamed_addr +declare ptr @__cxa_allocate_exception(i64) local_unnamed_addr -declare void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr +declare void @__cxa_throw(ptr, ptr, ptr) local_unnamed_addr declare i32 @__gxx_personality_v0(...) ; Function Attrs: nounwind readnone -declare i32 @llvm.eh.typeid.for(i8*) #1 +declare i32 @llvm.eh.typeid.for(ptr) #1 -declare i8* @__cxa_begin_catch(i8*) local_unnamed_addr +declare ptr @__cxa_begin_catch(ptr) local_unnamed_addr declare void @__cxa_end_catch() local_unnamed_addr diff --git a/llvm/test/Transforms/GlobalOpt/issue62384.ll b/llvm/test/Transforms/GlobalOpt/issue62384.ll index 65dbfa6a98665..cc2bc8940b891 100644 --- a/llvm/test/Transforms/GlobalOpt/issue62384.ll +++ b/llvm/test/Transforms/GlobalOpt/issue62384.ll @@ -1,9 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; if INTEL_SYCL_OPAQUEPOINTER_READY ; RUN: opt -S -p=globalopt %s | FileCheck %s -; else -// XFAIL: * -; end @llvm.used = appending global [1 x ptr] [ptr @ctor] diff --git a/llvm/test/Transforms/InstCombine/scalable-vector-struct.ll b/llvm/test/Transforms/InstCombine/scalable-vector-struct.ll index f359e89578555..c9966be72fb51 100644 --- a/llvm/test/Transforms/InstCombine/scalable-vector-struct.ll +++ b/llvm/test/Transforms/InstCombine/scalable-vector-struct.ll @@ -1,23 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; ifndef INTEL_SYCL_OPAQUEPOINTER_READY ; RUN: opt -passes=instcombine -S < %s 2>&1 | FileCheck %s -; endif %struct.test = type { , } -define @load(%struct.test* %x) { +define @load(ptr %x) { ; CHECK-LABEL: define @load ; CHECK-SAME: (ptr [[X:%.*]]) { ; CHECK-NEXT: [[A:%.*]] = load [[STRUCT_TEST:%.*]], ptr [[X]], align 4 ; CHECK-NEXT: [[B:%.*]] = extractvalue [[STRUCT_TEST]] [[A]], 1 ; CHECK-NEXT: ret [[B]] ; - %a = load %struct.test, %struct.test* %x + %a = load %struct.test, ptr %x %b = extractvalue %struct.test %a, 1 ret %b } -define void @store(%struct.test* %x, %y, %z) { +define void @store(ptr %x, %y, %z) { ; CHECK-LABEL: define void @store ; CHECK-SAME: (ptr [[X:%.*]], [[Y:%.*]], [[Z:%.*]]) { ; CHECK-NEXT: [[A:%.*]] = insertvalue [[STRUCT_TEST:%.*]] undef, [[Y]], 0 @@ -27,6 +25,6 @@ define void @store(%struct.test* %x, %y, % ; %a = insertvalue %struct.test undef, %y, 0 %b = insertvalue %struct.test %a, %z, 1 - store %struct.test %b, %struct.test* %x + store %struct.test %b, ptr %x ret void } diff --git a/llvm/test/Transforms/MoveAutoInit/clobber.ll b/llvm/test/Transforms/MoveAutoInit/clobber.ll index f1f7f63bf8078..09084b6ddc51b 100644 --- a/llvm/test/Transforms/MoveAutoInit/clobber.ll +++ b/llvm/test/Transforms/MoveAutoInit/clobber.ll @@ -1,11 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Checks that move-auto-init can move instruction passed unclobbering memory ; instructions. -; ifdef INTEL_SYCL_OPAQUEPOINTER_READY -; COM: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s -; else ; RUN: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s -; endif target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -49,25 +45,25 @@ define i32 @foo(i32 noundef %0, i32 noundef %1, i32 noundef %2) #0 { %4 = alloca [100 x i8], align 16 %5 = alloca [2 x i8], align 1 - %6 = getelementptr inbounds [100 x i8], [100 x i8]* %4, i64 0, i64 0 - call void @llvm.lifetime.start.p0i8(i64 100, i8* nonnull %6) #3 + %6 = getelementptr inbounds [100 x i8], ptr %4, i64 0, i64 0 + call void @llvm.lifetime.start.p0(i64 100, ptr nonnull %6) #3 ; This memset must move. - call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 16 dereferenceable(100) %6, i8 -86, i64 100, i1 false), !annotation !0 - %7 = getelementptr inbounds [2 x i8], [2 x i8]* %5, i64 0, i64 0 - call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %7) #3 + call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(100) %6, i8 -86, i64 100, i1 false), !annotation !0 + %7 = getelementptr inbounds [2 x i8], ptr %5, i64 0, i64 0 + call void @llvm.lifetime.start.p0(i64 2, ptr nonnull %7) #3 ; This store must move. - store i8 -86, i8* %7, align 1, !annotation !0 - %8 = getelementptr inbounds [2 x i8], [2 x i8]* %5, i64 0, i64 1 + store i8 -86, ptr %7, align 1, !annotation !0 + %8 = getelementptr inbounds [2 x i8], ptr %5, i64 0, i64 1 ; This store must move. - store i8 -86, i8* %8, align 1, !annotation !0 + store i8 -86, ptr %8, align 1, !annotation !0 %9 = icmp eq i32 %1, 0 br i1 %9, label %15, label %10 10: %11 = sext i32 %0 to i64 - %12 = getelementptr inbounds [100 x i8], [100 x i8]* %4, i64 0, i64 %11 - store i8 12, i8* %12, align 1 - %13 = load i8, i8* %6, align 16 + %12 = getelementptr inbounds [100 x i8], ptr %4, i64 0, i64 %11 + store i8 12, ptr %12, align 1 + %13 = load i8, ptr %6, align 16 %14 = sext i8 %13 to i32 br label %22 @@ -77,24 +73,24 @@ define i32 @foo(i32 noundef %0, i32 noundef %1, i32 noundef %2) #0 { 17: %18 = sext i32 %0 to i64 - %19 = getelementptr inbounds [2 x i8], [2 x i8]* %5, i64 0, i64 %18 - store i8 12, i8* %19, align 1 - %20 = load i8, i8* %7, align 1 + %19 = getelementptr inbounds [2 x i8], ptr %5, i64 0, i64 %18 + store i8 12, ptr %19, align 1 + %20 = load i8, ptr %7, align 1 %21 = sext i8 %20 to i32 br label %22 22: %23 = phi i32 [ %14, %10 ], [ %21, %17 ], [ 0, %15 ] - call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %7) #3 - call void @llvm.lifetime.end.p0i8(i64 100, i8* nonnull %6) #3 + call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %7) #3 + call void @llvm.lifetime.end.p0(i64 100, ptr nonnull %6) #3 ret i32 %23 } -declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2 +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2 -declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 attributes #0 = { mustprogress nofree nosync nounwind readnone uwtable willreturn } attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn } diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86-basic.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86-basic.ll index 67d0ca602da40..314546afb8853 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86-basic.ll +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86-basic.ll @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; ifndef INTEL_SYCL_OPAQUEPOINTER_READY ;; Check that we accept functions with '$' in the name. ; RUN: llc -mtriple=x86_64 < %s | FileCheck %s @@ -8,7 +7,6 @@ ;; Check that we accept .seh_proc below the function label. ; RUN: llc -mtriple=x86_64-windows -relocation-model=pic < %s | FileCheck %s --check-prefix=WIN -; endif @gv0 = dso_local global i32 0, align 4 @gv1 = dso_preemptable global i32 0, align 4 @@ -28,8 +26,8 @@ declare void @ext() define i32 @load() { entry: - %a = load i32, i32* @gv0 - %b = load i32, i32* @gv1 + %a = load i32, ptr @gv0 + %b = load i32, ptr @gv1 %c = add i32 %a, %b ret i32 %c } diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86-basic.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86-basic.ll.expected index ab1e3e6027952..f46d3071b67f1 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86-basic.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86-basic.ll.expected @@ -1,5 +1,4 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; ifndef INTEL_SYCL_OPAQUEPOINTER_READY ;; Check that we accept functions with '$' in the name. ; RUN: llc -mtriple=x86_64 < %s | FileCheck %s @@ -8,7 +7,6 @@ ;; Check that we accept .seh_proc below the function label. ; RUN: llc -mtriple=x86_64-windows -relocation-model=pic < %s | FileCheck %s --check-prefix=WIN -; endif @gv0 = dso_local global i32 0, align 4 @gv1 = dso_preemptable global i32 0, align 4 @@ -91,8 +89,8 @@ define i32 @load() { ; WIN-NEXT: addl gv1(%rip), %eax ; WIN-NEXT: retq entry: - %a = load i32, i32* @gv0 - %b = load i32, i32* @gv1 + %a = load i32, ptr @gv0 + %b = load i32, ptr @gv1 %c = add i32 %a, %b ret i32 %c }