diff --git a/llvm/test/Assembler/scalable-vector-struct.ll b/llvm/test/Assembler/scalable-vector-struct.ll
index d0b49b11b7fad..a16048335dbf8 100644
--- a/llvm/test/Assembler/scalable-vector-struct.ll
+++ b/llvm/test/Assembler/scalable-vector-struct.ll
@@ -1,23 +1,21 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
-; ifndef INTEL_SYCL_OPAQUEPOINTER_READY
 ; RUN: opt -passes=verify -S < %s 2>&1 | FileCheck %s
-; endif
 
 %struct.test = type { <vscale x 1 x i32>, <vscale x 1 x i32> }
 
-define <vscale x 1 x i32> @load(%struct.test* %x) {
+define <vscale x 1 x i32> @load(ptr %x) {
 ; CHECK-LABEL: define <vscale x 1 x i32> @load
 ; CHECK-SAME: (ptr [[X:%.*]]) {
 ; CHECK-NEXT:    [[A:%.*]] = load [[STRUCT_TEST:%.*]], ptr [[X]], align 4
 ; CHECK-NEXT:    [[B:%.*]] = extractvalue [[STRUCT_TEST]] [[A]], 1
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[B]]
 ;
-  %a = load %struct.test, %struct.test* %x
+  %a = load %struct.test, ptr %x
   %b = extractvalue %struct.test %a, 1
   ret <vscale x 1 x i32> %b
 }
 
-define void @store(%struct.test* %x, <vscale x 1 x i32> %y, <vscale x 1 x i32> %z) {
+define void @store(ptr %x, <vscale x 1 x i32> %y, <vscale x 1 x i32> %z) {
 ; CHECK-LABEL: define void @store
 ; CHECK-SAME: (ptr [[X:%.*]], <vscale x 1 x i32> [[Y:%.*]], <vscale x 1 x i32> [[Z:%.*]]) {
 ; CHECK-NEXT:    [[A:%.*]] = insertvalue [[STRUCT_TEST:%.*]] undef, <vscale x 1 x i32> [[Y]], 0
@@ -27,6 +25,6 @@ define void @store(%struct.test* %x, <vscale x 1 x i32> %y, <vscale x 1 x i32> %
 ;
   %a = insertvalue %struct.test undef, <vscale x 1 x i32> %y, 0
   %b = insertvalue %struct.test %a, <vscale x 1 x i32> %z, 1
-  store %struct.test %b, %struct.test* %x
+  store %struct.test %b, ptr %x
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-triple.ll b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-triple.ll
index 9b1d5305a7696..1e094bbc91097 100644
--- a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-triple.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-triple.ll
@@ -1,9 +1,7 @@
 ; This test checks that the Local Accessor to Shared Memory pass runs with the
 ; `amdgcn-amd-amdhsa` triple, but not with `amdgcn-amd-amdpal`.
-; ifndef INTEL_SYCL_OPAQUEPOINTER_READY
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefix=CHECK-VALID %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal < %s | FileCheck --check-prefix=CHECK-INVALID %s
-; end
 
 ; ModuleID = 'local-accessor-to-shared-memory-triple.ll'
 source_filename = "local-accessor-to-shared-memory-triple.ll"
diff --git a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll
index e47851320ff1c..3f00cd23d6475 100644
--- a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll
@@ -1,9 +1,7 @@
 ; This test checks that the Local Accessor to Shared Memory pass runs with the
 ; `amdgcn-amd-amdhsa` triple and does not if the option is not present.
-; ifndef INTEL_SYCL_OPAQUEPOINTER_READY
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefix=CHECK-OPT %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s | FileCheck --check-prefix=CHECK-OPT %s
-; end
 
 ; ModuleID = 'local-accessor-to-shared-memory-valid-triple.ll'
 source_filename = "local-accessor-to-shared-memory-valid-triple.ll"
diff --git a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
index d013858df0cbe..5fc89702456cf 100644
--- a/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
+++ b/llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
@@ -1,12 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; ifndef INTEL_SYCL_OPAQUEPOINTER_READY
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=avx512vl | FileCheck %s
-; end
 
 ; Test that we can unfold constant pool loads when we're using avx512's
 ; ability to fold a broadcast load into an operation.
 
-define void @bcast_unfold_add_v16i32(i32* %arg) {
+define void @bcast_unfold_add_v16i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_add_v16i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -26,12 +24,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp4 = bitcast i32* %tmp3 to <16 x i32>*
-  %tmp5 = load <16 x i32>, <16 x i32>* %tmp4, align 4
+  %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <16 x i32>, ptr %tmp4, align 4
   %tmp6 = add nsw <16 x i32> %tmp5, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %tmp7 = bitcast i32* %tmp3 to <16 x i32>*
-  store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <16 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 16
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -40,7 +38,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_add_v8i32(i32* %arg) {
+define void @bcast_unfold_add_v8i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_add_v8i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -60,12 +58,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp4 = bitcast i32* %tmp3 to <8 x i32>*
-  %tmp5 = load <8 x i32>, <8 x i32>* %tmp4, align 4
+  %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <8 x i32>, ptr %tmp4, align 4
   %tmp6 = add nsw <8 x i32> %tmp5, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %tmp7 = bitcast i32* %tmp3 to <8 x i32>*
-  store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <8 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -74,7 +72,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_add_v4i32(i32* %arg) {
+define void @bcast_unfold_add_v4i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_add_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -93,12 +91,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp4 = bitcast i32* %tmp3 to <4 x i32>*
-  %tmp5 = load <4 x i32>, <4 x i32>* %tmp4, align 4
+  %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <4 x i32>, ptr %tmp4, align 4
   %tmp6 = add nsw <4 x i32> %tmp5, <i32 2, i32 2, i32 2, i32 2>
-  %tmp7 = bitcast i32* %tmp3 to <4 x i32>*
-  store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <4 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -107,7 +105,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_add_v8i64(i64* %arg) {
+define void @bcast_unfold_add_v8i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_add_v8i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -127,12 +125,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp4 = bitcast i64* %tmp3 to <8 x i64>*
-  %tmp5 = load <8 x i64>, <8 x i64>* %tmp4, align 8
+  %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <8 x i64>, ptr %tmp4, align 8
   %tmp6 = add nsw <8 x i64> %tmp5, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-  %tmp7 = bitcast i64* %tmp3 to <8 x i64>*
-  store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <8 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -141,7 +139,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_add_v4i64(i64* %arg) {
+define void @bcast_unfold_add_v4i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_add_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -161,12 +159,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp4 = bitcast i64* %tmp3 to <4 x i64>*
-  %tmp5 = load <4 x i64>, <4 x i64>* %tmp4, align 8
+  %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <4 x i64>, ptr %tmp4, align 8
   %tmp6 = add nsw <4 x i64> %tmp5, <i64 2, i64 2, i64 2, i64 2>
-  %tmp7 = bitcast i64* %tmp3 to <4 x i64>*
-  store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <4 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -175,7 +173,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_add_v2i64(i64* %arg) {
+define void @bcast_unfold_add_v2i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_add_v2i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -194,12 +192,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp4 = bitcast i64* %tmp3 to <2 x i64>*
-  %tmp5 = load <2 x i64>, <2 x i64>* %tmp4, align 8
+  %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <2 x i64>, ptr %tmp4, align 8
   %tmp6 = add nsw <2 x i64> %tmp5, <i64 2, i64 2>
-  %tmp7 = bitcast i64* %tmp3 to <2 x i64>*
-  store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <2 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 2
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -208,7 +206,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_mul_v16i32(i32* %arg) {
+define void @bcast_unfold_mul_v16i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_mul_v16i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -229,12 +227,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp4 = bitcast i32* %tmp3 to <16 x i32>*
-  %tmp5 = load <16 x i32>, <16 x i32>* %tmp4, align 4
+  %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <16 x i32>, ptr %tmp4, align 4
   %tmp6 = mul nsw <16 x i32> %tmp5, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %tmp7 = bitcast i32* %tmp3 to <16 x i32>*
-  store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <16 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 16
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -243,7 +241,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_mul_v8i32(i32* %arg) {
+define void @bcast_unfold_mul_v8i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_mul_v8i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -264,12 +262,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp4 = bitcast i32* %tmp3 to <8 x i32>*
-  %tmp5 = load <8 x i32>, <8 x i32>* %tmp4, align 4
+  %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <8 x i32>, ptr %tmp4, align 4
   %tmp6 = mul nsw <8 x i32> %tmp5, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %tmp7 = bitcast i32* %tmp3 to <8 x i32>*
-  store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <8 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -278,7 +276,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_mul_v4i32(i32* %arg) {
+define void @bcast_unfold_mul_v4i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_mul_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -298,12 +296,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp4 = bitcast i32* %tmp3 to <4 x i32>*
-  %tmp5 = load <4 x i32>, <4 x i32>* %tmp4, align 4
+  %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <4 x i32>, ptr %tmp4, align 4
   %tmp6 = mul nsw <4 x i32> %tmp5, <i32 3, i32 3, i32 3, i32 3>
-  %tmp7 = bitcast i32* %tmp3 to <4 x i32>*
-  store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <4 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -312,7 +310,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_mul_v8i64(i64* %arg) {
+define void @bcast_unfold_mul_v8i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_mul_v8i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -333,12 +331,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp4 = bitcast i64* %tmp3 to <8 x i64>*
-  %tmp5 = load <8 x i64>, <8 x i64>* %tmp4, align 8
+  %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <8 x i64>, ptr %tmp4, align 8
   %tmp6 = mul nsw <8 x i64> %tmp5, <i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3>
-  %tmp7 = bitcast i64* %tmp3 to <8 x i64>*
-  store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <8 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -347,7 +345,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_mul_v4i64(i64* %arg) {
+define void @bcast_unfold_mul_v4i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_mul_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -368,12 +366,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp4 = bitcast i64* %tmp3 to <4 x i64>*
-  %tmp5 = load <4 x i64>, <4 x i64>* %tmp4, align 8
+  %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <4 x i64>, ptr %tmp4, align 8
   %tmp6 = mul nsw <4 x i64> %tmp5, <i64 3, i64 3, i64 3, i64 3>
-  %tmp7 = bitcast i64* %tmp3 to <4 x i64>*
-  store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <4 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -382,7 +380,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_mul_v2i64(i64* %arg) {
+define void @bcast_unfold_mul_v2i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_mul_v2i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -402,12 +400,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp4 = bitcast i64* %tmp3 to <2 x i64>*
-  %tmp5 = load <2 x i64>, <2 x i64>* %tmp4, align 8
+  %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <2 x i64>, ptr %tmp4, align 8
   %tmp6 = mul nsw <2 x i64> %tmp5, <i64 3, i64 3>
-  %tmp7 = bitcast i64* %tmp3 to <2 x i64>*
-  store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <2 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 2
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -416,7 +414,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_or_v16i32(i32* %arg) {
+define void @bcast_unfold_or_v16i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_or_v16i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -436,12 +434,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp4 = bitcast i32* %tmp3 to <16 x i32>*
-  %tmp5 = load <16 x i32>, <16 x i32>* %tmp4, align 4
+  %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <16 x i32>, ptr %tmp4, align 4
   %tmp6 = or <16 x i32> %tmp5, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %tmp7 = bitcast i32* %tmp3 to <16 x i32>*
-  store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <16 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 16
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -450,7 +448,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_or_v8i32(i32* %arg) {
+define void @bcast_unfold_or_v8i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_or_v8i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -470,12 +468,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp4 = bitcast i32* %tmp3 to <8 x i32>*
-  %tmp5 = load <8 x i32>, <8 x i32>* %tmp4, align 4
+  %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <8 x i32>, ptr %tmp4, align 4
   %tmp6 = or <8 x i32> %tmp5, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-  %tmp7 = bitcast i32* %tmp3 to <8 x i32>*
-  store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <8 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -484,7 +482,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_or_v4i32(i32* %arg) {
+define void @bcast_unfold_or_v4i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_or_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -503,12 +501,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp4 = bitcast i32* %tmp3 to <4 x i32>*
-  %tmp5 = load <4 x i32>, <4 x i32>* %tmp4, align 4
+  %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <4 x i32>, ptr %tmp4, align 4
   %tmp6 = or <4 x i32> %tmp5, <i32 3, i32 3, i32 3, i32 3>
-  %tmp7 = bitcast i32* %tmp3 to <4 x i32>*
-  store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <4 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -517,7 +515,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_or_v8i64(i64* %arg) {
+define void @bcast_unfold_or_v8i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_or_v8i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -537,12 +535,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp4 = bitcast i64* %tmp3 to <8 x i64>*
-  %tmp5 = load <8 x i64>, <8 x i64>* %tmp4, align 8
+  %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <8 x i64>, ptr %tmp4, align 8
   %tmp6 = or <8 x i64> %tmp5, <i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3>
-  %tmp7 = bitcast i64* %tmp3 to <8 x i64>*
-  store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <8 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -551,7 +549,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_or_v4i64(i64* %arg) {
+define void @bcast_unfold_or_v4i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_or_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -571,12 +569,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp4 = bitcast i64* %tmp3 to <4 x i64>*
-  %tmp5 = load <4 x i64>, <4 x i64>* %tmp4, align 8
+  %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <4 x i64>, ptr %tmp4, align 8
   %tmp6 = or <4 x i64> %tmp5, <i64 3, i64 3, i64 3, i64 3>
-  %tmp7 = bitcast i64* %tmp3 to <4 x i64>*
-  store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <4 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -585,7 +583,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_or_v2i64(i64* %arg) {
+define void @bcast_unfold_or_v2i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_or_v2i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -605,12 +603,12 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb2 ]
-  %tmp3 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp4 = bitcast i64* %tmp3 to <2 x i64>*
-  %tmp5 = load <2 x i64>, <2 x i64>* %tmp4, align 8
+  %tmp3 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <2 x i64>, ptr %tmp4, align 8
   %tmp6 = or <2 x i64> %tmp5, <i64 3, i64 3>
-  %tmp7 = bitcast i64* %tmp3 to <2 x i64>*
-  store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp3 to ptr
+  store <2 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 2
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb2
@@ -619,7 +617,7 @@ bb10:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_fneg_v16f32(float* %arg) {
+define void @bcast_unfold_fneg_v16f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fneg_v16f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -639,12 +637,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <16 x float>*
-  %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x float>, ptr %tmp3, align 4
   %tmp5 = fneg <16 x float> %tmp4
-  %tmp6 = bitcast float* %tmp2 to <16 x float>*
-  store <16 x float> %tmp5, <16 x float>* %tmp6, align 4
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <16 x float> %tmp5, ptr %tmp6, align 4
   %tmp7 = add i64 %tmp, 16
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -653,7 +651,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fneg_v8f32(float* %arg) {
+define void @bcast_unfold_fneg_v8f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fneg_v8f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -673,12 +671,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <8 x float>*
-  %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x float>, ptr %tmp3, align 4
   %tmp5 = fneg <8 x float> %tmp4
-  %tmp6 = bitcast float* %tmp2 to <8 x float>*
-  store <8 x float> %tmp5, <8 x float>* %tmp6, align 4
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <8 x float> %tmp5, ptr %tmp6, align 4
   %tmp7 = add i64 %tmp, 8
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -687,7 +685,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fneg_v4f32(float* %arg) {
+define void @bcast_unfold_fneg_v4f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fneg_v4f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -706,12 +704,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <4 x float>*
-  %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x float>, ptr %tmp3, align 4
   %tmp5 = fneg <4 x float> %tmp4
-  %tmp6 = bitcast float* %tmp2 to <4 x float>*
-  store <4 x float> %tmp5, <4 x float>* %tmp6, align 4
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <4 x float> %tmp5, ptr %tmp6, align 4
   %tmp7 = add i64 %tmp, 4
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -720,7 +718,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fneg_v8f64(double* %arg) {
+define void @bcast_unfold_fneg_v8f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fneg_v8f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -740,12 +738,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <8 x double>*
-  %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x double>, ptr %tmp3, align 8
   %tmp5 = fneg <8 x double> %tmp4
-  %tmp6 = bitcast double* %tmp2 to <8 x double>*
-  store <8 x double> %tmp5, <8 x double>* %tmp6, align 8
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <8 x double> %tmp5, ptr %tmp6, align 8
   %tmp7 = add i64 %tmp, 8
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -754,7 +752,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fneg_v4f64(double* %arg) {
+define void @bcast_unfold_fneg_v4f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fneg_v4f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -774,12 +772,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <4 x double>*
-  %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x double>, ptr %tmp3, align 8
   %tmp5 = fneg <4 x double> %tmp4
-  %tmp6 = bitcast double* %tmp2 to <4 x double>*
-  store <4 x double> %tmp5, <4 x double>* %tmp6, align 8
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <4 x double> %tmp5, ptr %tmp6, align 8
   %tmp7 = add i64 %tmp, 4
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -788,7 +786,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fneg_v2f64(double* %arg) {
+define void @bcast_unfold_fneg_v2f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fneg_v2f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -808,12 +806,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <2 x double>*
-  %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x double>, ptr %tmp3, align 8
   %tmp5 = fneg <2 x double> %tmp4
-  %tmp6 = bitcast double* %tmp2 to <2 x double>*
-  store <2 x double> %tmp5, <2 x double>* %tmp6, align 8
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <2 x double> %tmp5, ptr %tmp6, align 8
   %tmp7 = add i64 %tmp, 2
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -822,7 +820,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fabs_v16f32(float* %arg) {
+define void @bcast_unfold_fabs_v16f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fabs_v16f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -842,12 +840,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <16 x float>*
-  %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x float>, ptr %tmp3, align 4
   %tmp5 = call <16 x float> @llvm.fabs.v16f32(<16 x float> %tmp4)
-  %tmp6 = bitcast float* %tmp2 to <16 x float>*
-  store <16 x float> %tmp5, <16 x float>* %tmp6, align 4
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <16 x float> %tmp5, ptr %tmp6, align 4
   %tmp7 = add i64 %tmp, 16
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -859,7 +857,7 @@ bb9:                                              ; preds = %bb1
 ; Function Attrs: nounwind readnone speculatable willreturn
 declare <16 x float> @llvm.fabs.v16f32(<16 x float>) #0
 
-define void @bcast_unfold_fabs_v8f32(float* %arg) {
+define void @bcast_unfold_fabs_v8f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fabs_v8f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -879,12 +877,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <8 x float>*
-  %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x float>, ptr %tmp3, align 4
   %tmp5 = call <8 x float> @llvm.fabs.v8f32(<8 x float> %tmp4)
-  %tmp6 = bitcast float* %tmp2 to <8 x float>*
-  store <8 x float> %tmp5, <8 x float>* %tmp6, align 4
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <8 x float> %tmp5, ptr %tmp6, align 4
   %tmp7 = add i64 %tmp, 8
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -896,7 +894,7 @@ bb9:                                              ; preds = %bb1
 ; Function Attrs: nounwind readnone speculatable willreturn
 declare <8 x float> @llvm.fabs.v8f32(<8 x float>) #0
 
-define void @bcast_unfold_fabs_v4f32(float* %arg) {
+define void @bcast_unfold_fabs_v4f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fabs_v4f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -915,12 +913,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <4 x float>*
-  %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x float>, ptr %tmp3, align 4
   %tmp5 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %tmp4)
-  %tmp6 = bitcast float* %tmp2 to <4 x float>*
-  store <4 x float> %tmp5, <4 x float>* %tmp6, align 4
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <4 x float> %tmp5, ptr %tmp6, align 4
   %tmp7 = add i64 %tmp, 4
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -932,7 +930,7 @@ bb9:                                              ; preds = %bb1
 ; Function Attrs: nounwind readnone speculatable willreturn
 declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #0
 
-define void @bcast_unfold_fabs_v8f64(double* %arg) {
+define void @bcast_unfold_fabs_v8f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fabs_v8f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -952,12 +950,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <8 x double>*
-  %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x double>, ptr %tmp3, align 8
   %tmp5 = call <8 x double> @llvm.fabs.v8f64(<8 x double> %tmp4)
-  %tmp6 = bitcast double* %tmp2 to <8 x double>*
-  store <8 x double> %tmp5, <8 x double>* %tmp6, align 8
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <8 x double> %tmp5, ptr %tmp6, align 8
   %tmp7 = add i64 %tmp, 8
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -969,7 +967,7 @@ bb9:                                              ; preds = %bb1
 ; Function Attrs: nounwind readnone speculatable willreturn
 declare <8 x double> @llvm.fabs.v8f64(<8 x double>) #0
 
-define void @bcast_unfold_fabs_v4f64(double* %arg) {
+define void @bcast_unfold_fabs_v4f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fabs_v4f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -989,12 +987,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <4 x double>*
-  %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x double>, ptr %tmp3, align 8
   %tmp5 = call <4 x double> @llvm.fabs.v4f64(<4 x double> %tmp4)
-  %tmp6 = bitcast double* %tmp2 to <4 x double>*
-  store <4 x double> %tmp5, <4 x double>* %tmp6, align 8
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <4 x double> %tmp5, ptr %tmp6, align 8
   %tmp7 = add i64 %tmp, 4
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1006,7 +1004,7 @@ bb9:                                              ; preds = %bb1
 ; Function Attrs: nounwind readnone speculatable willreturn
 declare <4 x double> @llvm.fabs.v4f64(<4 x double>) #0
 
-define void @bcast_unfold_fabs_v2f64(double* %arg) {
+define void @bcast_unfold_fabs_v2f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fabs_v2f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -1026,12 +1024,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <2 x double>*
-  %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x double>, ptr %tmp3, align 8
   %tmp5 = call <2 x double> @llvm.fabs.v2f64(<2 x double> %tmp4)
-  %tmp6 = bitcast double* %tmp2 to <2 x double>*
-  store <2 x double> %tmp5, <2 x double>* %tmp6, align 8
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <2 x double> %tmp5, ptr %tmp6, align 8
   %tmp7 = add i64 %tmp, 2
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1043,7 +1041,7 @@ bb9:                                              ; preds = %bb1
 ; Function Attrs: nounwind readnone speculatable willreturn
 declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0
 
-define void @bcast_unfold_fadd_v16f32(float* nocapture %arg) {
+define void @bcast_unfold_fadd_v16f32(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fadd_v16f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -1063,12 +1061,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <16 x float>*
-  %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x float>, ptr %tmp3, align 4
   %tmp5 = fadd <16 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-  %tmp6 = bitcast float* %tmp2 to <16 x float>*
-  store <16 x float> %tmp5, <16 x float>* %tmp6, align 4
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <16 x float> %tmp5, ptr %tmp6, align 4
   %tmp7 = add i64 %tmp, 16
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1077,7 +1075,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fadd_v8f32(float* nocapture %arg) {
+define void @bcast_unfold_fadd_v8f32(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fadd_v8f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -1097,12 +1095,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <8 x float>*
-  %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x float>, ptr %tmp3, align 4
   %tmp5 = fadd <8 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-  %tmp6 = bitcast float* %tmp2 to <8 x float>*
-  store <8 x float> %tmp5, <8 x float>* %tmp6, align 4
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <8 x float> %tmp5, ptr %tmp6, align 4
   %tmp7 = add i64 %tmp, 8
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1111,7 +1109,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fadd_v4f32(float* nocapture %arg) {
+define void @bcast_unfold_fadd_v4f32(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fadd_v4f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -1130,12 +1128,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <4 x float>*
-  %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x float>, ptr %tmp3, align 4
   %tmp5 = fadd <4 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-  %tmp6 = bitcast float* %tmp2 to <4 x float>*
-  store <4 x float> %tmp5, <4 x float>* %tmp6, align 4
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <4 x float> %tmp5, ptr %tmp6, align 4
   %tmp7 = add i64 %tmp, 4
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1144,7 +1142,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fadd_v8f64(double* nocapture %arg) {
+define void @bcast_unfold_fadd_v8f64(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fadd_v8f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -1164,12 +1162,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <8 x double>*
-  %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x double>, ptr %tmp3, align 8
   %tmp5 = fadd <8 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-  %tmp6 = bitcast double* %tmp2 to <8 x double>*
-  store <8 x double> %tmp5, <8 x double>* %tmp6, align 8
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <8 x double> %tmp5, ptr %tmp6, align 8
   %tmp7 = add i64 %tmp, 8
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1178,7 +1176,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fadd_v4f64(double* nocapture %arg) {
+define void @bcast_unfold_fadd_v4f64(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fadd_v4f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -1198,12 +1196,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <4 x double>*
-  %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x double>, ptr %tmp3, align 8
   %tmp5 = fadd <4 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-  %tmp6 = bitcast double* %tmp2 to <4 x double>*
-  store <4 x double> %tmp5, <4 x double>* %tmp6, align 8
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <4 x double> %tmp5, ptr %tmp6, align 8
   %tmp7 = add i64 %tmp, 4
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1212,7 +1210,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fadd_v2f64(double* nocapture %arg) {
+define void @bcast_unfold_fadd_v2f64(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fadd_v2f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -1232,12 +1230,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <2 x double>*
-  %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x double>, ptr %tmp3, align 8
   %tmp5 = fadd <2 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00>
-  %tmp6 = bitcast double* %tmp2 to <2 x double>*
-  store <2 x double> %tmp5, <2 x double>* %tmp6, align 8
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <2 x double> %tmp5, ptr %tmp6, align 8
   %tmp7 = add i64 %tmp, 2
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1246,7 +1244,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmul_v16f32(float* nocapture %arg) {
+define void @bcast_unfold_fmul_v16f32(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fmul_v16f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -1266,12 +1264,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <16 x float>*
-  %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x float>, ptr %tmp3, align 4
   %tmp5 = fmul <16 x float> %tmp4, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
-  %tmp6 = bitcast float* %tmp2 to <16 x float>*
-  store <16 x float> %tmp5, <16 x float>* %tmp6, align 4
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <16 x float> %tmp5, ptr %tmp6, align 4
   %tmp7 = add i64 %tmp, 16
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1280,7 +1278,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmul_v8f32(float* nocapture %arg) {
+define void @bcast_unfold_fmul_v8f32(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fmul_v8f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -1300,12 +1298,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <8 x float>*
-  %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x float>, ptr %tmp3, align 4
   %tmp5 = fmul <8 x float> %tmp4, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
-  %tmp6 = bitcast float* %tmp2 to <8 x float>*
-  store <8 x float> %tmp5, <8 x float>* %tmp6, align 4
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <8 x float> %tmp5, ptr %tmp6, align 4
   %tmp7 = add i64 %tmp, 8
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1314,7 +1312,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmul_v4f32(float* nocapture %arg) {
+define void @bcast_unfold_fmul_v4f32(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fmul_v4f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -1333,12 +1331,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <4 x float>*
-  %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x float>, ptr %tmp3, align 4
   %tmp5 = fmul <4 x float> %tmp4, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
-  %tmp6 = bitcast float* %tmp2 to <4 x float>*
-  store <4 x float> %tmp5, <4 x float>* %tmp6, align 4
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <4 x float> %tmp5, ptr %tmp6, align 4
   %tmp7 = add i64 %tmp, 4
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1347,7 +1345,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmul_v8f64(double* nocapture %arg) {
+define void @bcast_unfold_fmul_v8f64(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fmul_v8f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -1367,12 +1365,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <8 x double>*
-  %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x double>, ptr %tmp3, align 8
   %tmp5 = fmul <8 x double> %tmp4, <double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00>
-  %tmp6 = bitcast double* %tmp2 to <8 x double>*
-  store <8 x double> %tmp5, <8 x double>* %tmp6, align 8
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <8 x double> %tmp5, ptr %tmp6, align 8
   %tmp7 = add i64 %tmp, 8
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1381,7 +1379,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmul_v4f64(double* nocapture %arg) {
+define void @bcast_unfold_fmul_v4f64(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fmul_v4f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -1401,12 +1399,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <4 x double>*
-  %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x double>, ptr %tmp3, align 8
   %tmp5 = fmul <4 x double> %tmp4, <double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00>
-  %tmp6 = bitcast double* %tmp2 to <4 x double>*
-  store <4 x double> %tmp5, <4 x double>* %tmp6, align 8
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <4 x double> %tmp5, ptr %tmp6, align 8
   %tmp7 = add i64 %tmp, 4
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1415,7 +1413,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmul_v2f64(double* nocapture %arg) {
+define void @bcast_unfold_fmul_v2f64(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fmul_v2f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -1435,12 +1433,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <2 x double>*
-  %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x double>, ptr %tmp3, align 8
   %tmp5 = fmul <2 x double> %tmp4, <double 3.000000e+00, double 3.000000e+00>
-  %tmp6 = bitcast double* %tmp2 to <2 x double>*
-  store <2 x double> %tmp5, <2 x double>* %tmp6, align 8
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <2 x double> %tmp5, ptr %tmp6, align 8
   %tmp7 = add i64 %tmp, 2
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1449,7 +1447,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fdiv_v16f32(float* nocapture %arg) {
+define void @bcast_unfold_fdiv_v16f32(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fdiv_v16f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -1470,12 +1468,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <16 x float>*
-  %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x float>, ptr %tmp3, align 4
   %tmp5 = fdiv <16 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-  %tmp6 = bitcast float* %tmp2 to <16 x float>*
-  store <16 x float> %tmp5, <16 x float>* %tmp6, align 4
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <16 x float> %tmp5, ptr %tmp6, align 4
   %tmp7 = add i64 %tmp, 16
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1484,7 +1482,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fdiv_v8f32(float* nocapture %arg) {
+define void @bcast_unfold_fdiv_v8f32(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fdiv_v8f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -1505,12 +1503,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <8 x float>*
-  %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x float>, ptr %tmp3, align 4
   %tmp5 = fdiv <8 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-  %tmp6 = bitcast float* %tmp2 to <8 x float>*
-  store <8 x float> %tmp5, <8 x float>* %tmp6, align 4
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <8 x float> %tmp5, ptr %tmp6, align 4
   %tmp7 = add i64 %tmp, 8
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1519,7 +1517,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fdiv_v4f32(float* nocapture %arg) {
+define void @bcast_unfold_fdiv_v4f32(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fdiv_v4f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -1539,12 +1537,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <4 x float>*
-  %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x float>, ptr %tmp3, align 4
   %tmp5 = fdiv <4 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-  %tmp6 = bitcast float* %tmp2 to <4 x float>*
-  store <4 x float> %tmp5, <4 x float>* %tmp6, align 4
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <4 x float> %tmp5, ptr %tmp6, align 4
   %tmp7 = add i64 %tmp, 4
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1553,7 +1551,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fdiv_v8f64(double* nocapture %arg) {
+define void @bcast_unfold_fdiv_v8f64(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fdiv_v8f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -1574,12 +1572,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <8 x double>*
-  %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x double>, ptr %tmp3, align 8
   %tmp5 = fdiv <8 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-  %tmp6 = bitcast double* %tmp2 to <8 x double>*
-  store <8 x double> %tmp5, <8 x double>* %tmp6, align 8
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <8 x double> %tmp5, ptr %tmp6, align 8
   %tmp7 = add i64 %tmp, 8
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1588,7 +1586,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fdiv_v4f64(double* nocapture %arg) {
+define void @bcast_unfold_fdiv_v4f64(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fdiv_v4f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -1609,12 +1607,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <4 x double>*
-  %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x double>, ptr %tmp3, align 8
   %tmp5 = fdiv <4 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-  %tmp6 = bitcast double* %tmp2 to <4 x double>*
-  store <4 x double> %tmp5, <4 x double>* %tmp6, align 8
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <4 x double> %tmp5, ptr %tmp6, align 8
   %tmp7 = add i64 %tmp, 4
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1623,7 +1621,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fdiv_v2f64(double* nocapture %arg) {
+define void @bcast_unfold_fdiv_v2f64(ptr nocapture %arg) {
 ; CHECK-LABEL: bcast_unfold_fdiv_v2f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -1644,12 +1642,12 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp7, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <2 x double>*
-  %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x double>, ptr %tmp3, align 8
   %tmp5 = fdiv <2 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00>
-  %tmp6 = bitcast double* %tmp2 to <2 x double>*
-  store <2 x double> %tmp5, <2 x double>* %tmp6, align 8
+  %tmp6 = bitcast ptr %tmp2 to ptr
+  store <2 x double> %tmp5, ptr %tmp6, align 8
   %tmp7 = add i64 %tmp, 2
   %tmp8 = icmp eq i64 %tmp7, 1024
   br i1 %tmp8, label %bb9, label %bb1
@@ -1658,7 +1656,7 @@ bb9:                                              ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fma213_v4f32(float* %arg) {
+define void @bcast_unfold_fma213_v4f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fma213_v4f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -1678,13 +1676,13 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ]
-  %tmp3 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp4 = bitcast float* %tmp3 to <4 x float>*
-  %tmp5 = load <4 x float>, <4 x float>* %tmp4, align 4
+  %tmp3 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <4 x float>, ptr %tmp4, align 4
   %tmp6 = fmul contract <4 x float> %tmp5, %tmp5
   %tmp7 = fadd contract <4 x float> %tmp6, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-  %tmp8 = bitcast float* %tmp3 to <4 x float>*
-  store <4 x float> %tmp7, <4 x float>* %tmp8, align 4
+  %tmp8 = bitcast ptr %tmp3 to ptr
+  store <4 x float> %tmp7, ptr %tmp8, align 4
   %tmp9 = add i64 %tmp, 4
   %tmp10 = icmp eq i64 %tmp9, 1024
   br i1 %tmp10, label %bb11, label %bb2
@@ -1693,7 +1691,7 @@ bb11:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_fma231_v4f32(float* %arg) {
+define void @bcast_unfold_fma231_v4f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fma231_v4f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -1713,13 +1711,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <4 x float>*
-  %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x float>, ptr %tmp3, align 4
   %tmp5 = fmul contract <4 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
   %tmp6 = fadd contract <4 x float> %tmp4, %tmp5
-  %tmp7 = bitcast float* %tmp2 to <4 x float>*
-  store <4 x float> %tmp6, <4 x float>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x float> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -1728,7 +1726,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fma213_v8f32(float* %arg) {
+define void @bcast_unfold_fma213_v8f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fma213_v8f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -1749,13 +1747,13 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ]
-  %tmp3 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp4 = bitcast float* %tmp3 to <8 x float>*
-  %tmp5 = load <8 x float>, <8 x float>* %tmp4, align 4
+  %tmp3 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <8 x float>, ptr %tmp4, align 4
   %tmp6 = fmul contract <8 x float> %tmp5, %tmp5
   %tmp7 = fadd contract <8 x float> %tmp6, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-  %tmp8 = bitcast float* %tmp3 to <8 x float>*
-  store <8 x float> %tmp7, <8 x float>* %tmp8, align 4
+  %tmp8 = bitcast ptr %tmp3 to ptr
+  store <8 x float> %tmp7, ptr %tmp8, align 4
   %tmp9 = add i64 %tmp, 8
   %tmp10 = icmp eq i64 %tmp9, 1024
   br i1 %tmp10, label %bb11, label %bb2
@@ -1764,7 +1762,7 @@ bb11:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_fma231_v8f32(float* %arg) {
+define void @bcast_unfold_fma231_v8f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fma231_v8f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -1785,13 +1783,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <8 x float>*
-  %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x float>, ptr %tmp3, align 4
   %tmp5 = fmul contract <8 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
   %tmp6 = fadd contract <8 x float> %tmp4, %tmp5
-  %tmp7 = bitcast float* %tmp2 to <8 x float>*
-  store <8 x float> %tmp6, <8 x float>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x float> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -1800,7 +1798,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fma213_v16f32(float* %arg) {
+define void @bcast_unfold_fma213_v16f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fma213_v16f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -1821,13 +1819,13 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ]
-  %tmp3 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp4 = bitcast float* %tmp3 to <16 x float>*
-  %tmp5 = load <16 x float>, <16 x float>* %tmp4, align 4
+  %tmp3 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <16 x float>, ptr %tmp4, align 4
   %tmp6 = fmul contract <16 x float> %tmp5, %tmp5
   %tmp7 = fadd contract <16 x float> %tmp6, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-  %tmp8 = bitcast float* %tmp3 to <16 x float>*
-  store <16 x float> %tmp7, <16 x float>* %tmp8, align 4
+  %tmp8 = bitcast ptr %tmp3 to ptr
+  store <16 x float> %tmp7, ptr %tmp8, align 4
   %tmp9 = add i64 %tmp, 16
   %tmp10 = icmp eq i64 %tmp9, 1024
   br i1 %tmp10, label %bb11, label %bb2
@@ -1836,7 +1834,7 @@ bb11:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_fma231_v16f32(float* %arg) {
+define void @bcast_unfold_fma231_v16f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fma231_v16f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -1857,13 +1855,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <16 x float>*
-  %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x float>, ptr %tmp3, align 4
   %tmp5 = fmul contract <16 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
   %tmp6 = fadd contract <16 x float> %tmp4, %tmp5
-  %tmp7 = bitcast float* %tmp2 to <16 x float>*
-  store <16 x float> %tmp6, <16 x float>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <16 x float> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 16
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -1872,7 +1870,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fma213_v2f64(double* %arg) {
+define void @bcast_unfold_fma213_v2f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fma213_v2f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -1893,13 +1891,13 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ]
-  %tmp3 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp4 = bitcast double* %tmp3 to <2 x double>*
-  %tmp5 = load <2 x double>, <2 x double>* %tmp4, align 4
+  %tmp3 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <2 x double>, ptr %tmp4, align 4
   %tmp6 = fmul contract <2 x double> %tmp5, %tmp5
   %tmp7 = fadd contract <2 x double> %tmp6, <double 2.000000e+00, double 2.000000e+00>
-  %tmp8 = bitcast double* %tmp3 to <2 x double>*
-  store <2 x double> %tmp7, <2 x double>* %tmp8, align 8
+  %tmp8 = bitcast ptr %tmp3 to ptr
+  store <2 x double> %tmp7, ptr %tmp8, align 8
   %tmp9 = add i64 %tmp, 2
   %tmp10 = icmp eq i64 %tmp9, 1024
   br i1 %tmp10, label %bb11, label %bb2
@@ -1908,7 +1906,7 @@ bb11:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_fma231_v2f64(double* %arg) {
+define void @bcast_unfold_fma231_v2f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fma231_v2f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -1929,13 +1927,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <2 x double>*
-  %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x double>, ptr %tmp3, align 8
   %tmp5 = fmul contract <2 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00>
   %tmp6 = fadd contract <2 x double> %tmp4, %tmp5
-  %tmp7 = bitcast double* %tmp2 to <2 x double>*
-  store <2 x double> %tmp6, <2 x double>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <2 x double> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 2
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -1944,7 +1942,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fma213_v4f64(double* %arg) {
+define void @bcast_unfold_fma213_v4f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fma213_v4f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -1965,13 +1963,13 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ]
-  %tmp3 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp4 = bitcast double* %tmp3 to <4 x double>*
-  %tmp5 = load <4 x double>, <4 x double>* %tmp4, align 8
+  %tmp3 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <4 x double>, ptr %tmp4, align 8
   %tmp6 = fmul contract <4 x double> %tmp5, %tmp5
   %tmp7 = fadd contract <4 x double> %tmp6, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-  %tmp8 = bitcast double* %tmp3 to <4 x double>*
-  store <4 x double> %tmp7, <4 x double>* %tmp8, align 8
+  %tmp8 = bitcast ptr %tmp3 to ptr
+  store <4 x double> %tmp7, ptr %tmp8, align 8
   %tmp9 = add i64 %tmp, 4
   %tmp10 = icmp eq i64 %tmp9, 1024
   br i1 %tmp10, label %bb11, label %bb2
@@ -1980,7 +1978,7 @@ bb11:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_fma231_v4f64(double* %arg) {
+define void @bcast_unfold_fma231_v4f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fma231_v4f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -2001,13 +1999,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <4 x double>*
-  %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x double>, ptr %tmp3, align 8
   %tmp5 = fmul contract <4 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
   %tmp6 = fadd contract <4 x double> %tmp4, %tmp5
-  %tmp7 = bitcast double* %tmp2 to <4 x double>*
-  store <4 x double> %tmp6, <4 x double>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x double> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2016,7 +2014,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fma213_v8f64(double* %arg) {
+define void @bcast_unfold_fma213_v8f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fma213_v8f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -2037,13 +2035,13 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp9, %bb2 ]
-  %tmp3 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp4 = bitcast double* %tmp3 to <8 x double>*
-  %tmp5 = load <8 x double>, <8 x double>* %tmp4, align 8
+  %tmp3 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <8 x double>, ptr %tmp4, align 8
   %tmp6 = fmul contract <8 x double> %tmp5, %tmp5
   %tmp7 = fadd contract <8 x double> %tmp6, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-  %tmp8 = bitcast double* %tmp3 to <8 x double>*
-  store <8 x double> %tmp7, <8 x double>* %tmp8, align 8
+  %tmp8 = bitcast ptr %tmp3 to ptr
+  store <8 x double> %tmp7, ptr %tmp8, align 8
   %tmp9 = add i64 %tmp, 8
   %tmp10 = icmp eq i64 %tmp9, 1024
   br i1 %tmp10, label %bb11, label %bb2
@@ -2052,7 +2050,7 @@ bb11:                                             ; preds = %bb2
   ret void
 }
 
-define void @bcast_unfold_fma231_v8f64(double* %arg) {
+define void @bcast_unfold_fma231_v8f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fma231_v8f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -2073,13 +2071,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <8 x double>*
-  %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x double>, ptr %tmp3, align 8
   %tmp5 = fmul contract <8 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
   %tmp6 = fadd contract <8 x double> %tmp4, %tmp5
-  %tmp7 = bitcast double* %tmp2 to <8 x double>*
-  store <8 x double> %tmp6, <8 x double>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x double> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2088,7 +2086,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmax_v4f32(float* %arg) {
+define void @bcast_unfold_fmax_v4f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fmax_v4f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -2108,13 +2106,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <4 x float>*
-  %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x float>, ptr %tmp3, align 4
   %tmp5 = fcmp ogt <4 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
   %tmp6 = select <4 x i1> %tmp5, <4 x float> %tmp4, <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-  %tmp7 = bitcast float* %tmp2 to <4 x float>*
-  store <4 x float> %tmp6, <4 x float>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x float> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2123,7 +2121,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmax_v8f32(float* %arg) {
+define void @bcast_unfold_fmax_v8f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fmax_v8f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -2144,13 +2142,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <8 x float>*
-  %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x float>, ptr %tmp3, align 4
   %tmp5 = fcmp ogt <8 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
   %tmp6 = select <8 x i1> %tmp5, <8 x float> %tmp4, <8 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-  %tmp7 = bitcast float* %tmp2 to <8 x float>*
-  store <8 x float> %tmp6, <8 x float>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x float> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2159,7 +2157,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmax_v16f32(float* %arg) {
+define void @bcast_unfold_fmax_v16f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fmax_v16f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -2180,13 +2178,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <16 x float>*
-  %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x float>, ptr %tmp3, align 4
   %tmp5 = fcmp ogt <16 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
   %tmp6 = select <16 x i1> %tmp5, <16 x float> %tmp4, <16 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-  %tmp7 = bitcast float* %tmp2 to <16 x float>*
-  store <16 x float> %tmp6, <16 x float>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <16 x float> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 16
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2195,7 +2193,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmax_v2f64(double* %arg) {
+define void @bcast_unfold_fmax_v2f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fmax_v2f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -2216,13 +2214,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <2 x double>*
-  %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x double>, ptr %tmp3, align 8
   %tmp5 = fcmp ogt <2 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00>
   %tmp6 = select <2 x i1> %tmp5, <2 x double> %tmp4, <2 x double> <double 2.000000e+00, double 2.000000e+00>
-  %tmp7 = bitcast double* %tmp2 to <2 x double>*
-  store <2 x double> %tmp6, <2 x double>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <2 x double> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 2
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2231,7 +2229,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmax_v4f64(double* %arg) {
+define void @bcast_unfold_fmax_v4f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fmax_v4f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -2252,13 +2250,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <4 x double>*
-  %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x double>, ptr %tmp3, align 8
   %tmp5 = fcmp ogt <4 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
   %tmp6 = select <4 x i1> %tmp5, <4 x double> %tmp4, <4 x double> <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-  %tmp7 = bitcast double* %tmp2 to <4 x double>*
-  store <4 x double> %tmp6, <4 x double>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x double> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2267,7 +2265,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmax_v8f64(double* %arg) {
+define void @bcast_unfold_fmax_v8f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fmax_v8f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -2288,13 +2286,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <8 x double>*
-  %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x double>, ptr %tmp3, align 8
   %tmp5 = fcmp ogt <8 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
   %tmp6 = select <8 x i1> %tmp5, <8 x double> %tmp4, <8 x double> <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-  %tmp7 = bitcast double* %tmp2 to <8 x double>*
-  store <8 x double> %tmp6, <8 x double>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x double> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2303,7 +2301,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmin_v4f32(float* %arg) {
+define void @bcast_unfold_fmin_v4f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fmin_v4f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -2323,13 +2321,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <4 x float>*
-  %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x float>, ptr %tmp3, align 4
   %tmp5 = fcmp olt <4 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
   %tmp6 = select <4 x i1> %tmp5, <4 x float> %tmp4, <4 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-  %tmp7 = bitcast float* %tmp2 to <4 x float>*
-  store <4 x float> %tmp6, <4 x float>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x float> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2338,7 +2336,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmin_v8f32(float* %arg) {
+define void @bcast_unfold_fmin_v8f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fmin_v8f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -2359,13 +2357,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <8 x float>*
-  %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x float>, ptr %tmp3, align 4
   %tmp5 = fcmp olt <8 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
   %tmp6 = select <8 x i1> %tmp5, <8 x float> %tmp4, <8 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-  %tmp7 = bitcast float* %tmp2 to <8 x float>*
-  store <8 x float> %tmp6, <8 x float>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x float> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2374,7 +2372,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmin_v16f32(float* %arg) {
+define void @bcast_unfold_fmin_v16f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fmin_v16f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -2395,13 +2393,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <16 x float>*
-  %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x float>, ptr %tmp3, align 4
   %tmp5 = fcmp olt <16 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
   %tmp6 = select <16 x i1> %tmp5, <16 x float> %tmp4, <16 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-  %tmp7 = bitcast float* %tmp2 to <16 x float>*
-  store <16 x float> %tmp6, <16 x float>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <16 x float> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 16
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2410,7 +2408,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmin_v2f64(double* %arg) {
+define void @bcast_unfold_fmin_v2f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fmin_v2f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -2431,13 +2429,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <2 x double>*
-  %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x double>, ptr %tmp3, align 8
   %tmp5 = fcmp olt <2 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00>
   %tmp6 = select <2 x i1> %tmp5, <2 x double> %tmp4, <2 x double> <double 2.000000e+00, double 2.000000e+00>
-  %tmp7 = bitcast double* %tmp2 to <2 x double>*
-  store <2 x double> %tmp6, <2 x double>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <2 x double> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 2
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2446,7 +2444,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmin_v4f64(double* %arg) {
+define void @bcast_unfold_fmin_v4f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fmin_v4f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -2467,13 +2465,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <4 x double>*
-  %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x double>, ptr %tmp3, align 8
   %tmp5 = fcmp olt <4 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
   %tmp6 = select <4 x i1> %tmp5, <4 x double> %tmp4, <4 x double> <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-  %tmp7 = bitcast double* %tmp2 to <4 x double>*
-  store <4 x double> %tmp6, <4 x double>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x double> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2482,7 +2480,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_fmin_v8f64(double* %arg) {
+define void @bcast_unfold_fmin_v8f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_fmin_v8f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -2503,13 +2501,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <8 x double>*
-  %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x double>, ptr %tmp3, align 8
   %tmp5 = fcmp olt <8 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
   %tmp6 = select <8 x i1> %tmp5, <8 x double> %tmp4, <8 x double> <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-  %tmp7 = bitcast double* %tmp2 to <8 x double>*
-  store <8 x double> %tmp6, <8 x double>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x double> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2518,7 +2516,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_smin_v4i32(i32* %arg) {
+define void @bcast_unfold_smin_v4i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_smin_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -2537,13 +2535,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <4 x i32>*
-  %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp slt <4 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2>
   %tmp6 = select <4 x i1> %tmp5, <4 x i32> %tmp4, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
-  %tmp7 = bitcast i32* %tmp2 to <4 x i32>*
-  store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2552,7 +2550,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_smin_v8i32(i32* %arg) {
+define void @bcast_unfold_smin_v8i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_smin_v8i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -2572,13 +2570,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <8 x i32>*
-  %tmp4 = load <8 x i32>, <8 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp slt <8 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
   %tmp6 = select <8 x i1> %tmp5, <8 x i32> %tmp4, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %tmp7 = bitcast i32* %tmp2 to <8 x i32>*
-  store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2587,7 +2585,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_smin_v16i32(i32* %arg) {
+define void @bcast_unfold_smin_v16i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_smin_v16i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -2607,13 +2605,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <16 x i32>*
-  %tmp4 = load <16 x i32>, <16 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp slt <16 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
   %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %tmp7 = bitcast i32* %tmp2 to <16 x i32>*
-  store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <16 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 16
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2622,7 +2620,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_smin_v2i64(i64* %arg) {
+define void @bcast_unfold_smin_v2i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_smin_v2i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -2641,13 +2639,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <2 x i64>*
-  %tmp4 = load <2 x i64>, <2 x i64>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x i64>, ptr %tmp3, align 8
   %tmp5 = icmp slt <2 x i64> %tmp4, <i64 2, i64 2>
   %tmp6 = select <2 x i1> %tmp5, <2 x i64> %tmp4, <2 x i64> <i64 2, i64 2>
-  %tmp7 = bitcast i64* %tmp2 to <2 x i64>*
-  store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <2 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 2
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2656,7 +2654,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_smin_v4i64(i64* %arg) {
+define void @bcast_unfold_smin_v4i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_smin_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -2676,13 +2674,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <4 x i64>*
-  %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i64>, ptr %tmp3, align 8
   %tmp5 = icmp slt <4 x i64> %tmp4, <i64 2, i64 2, i64 2, i64 2>
   %tmp6 = select <4 x i1> %tmp5, <4 x i64> %tmp4, <4 x i64> <i64 2, i64 2, i64 2, i64 2>
-  %tmp7 = bitcast i64* %tmp2 to <4 x i64>*
-  store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2691,7 +2689,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_smin_v8i64(i64* %arg) {
+define void @bcast_unfold_smin_v8i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_smin_v8i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -2711,13 +2709,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <8 x i64>*
-  %tmp4 = load <8 x i64>, <8 x i64>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x i64>, ptr %tmp3, align 8
   %tmp5 = icmp slt <8 x i64> %tmp4, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
   %tmp6 = select <8 x i1> %tmp5, <8 x i64> %tmp4, <8 x i64> <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-  %tmp7 = bitcast i64* %tmp2 to <8 x i64>*
-  store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2726,7 +2724,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_smax_v4i32(i32* %arg) {
+define void @bcast_unfold_smax_v4i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_smax_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -2745,13 +2743,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <4 x i32>*
-  %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp sgt <4 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2>
   %tmp6 = select <4 x i1> %tmp5, <4 x i32> %tmp4, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
-  %tmp7 = bitcast i32* %tmp2 to <4 x i32>*
-  store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2760,7 +2758,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_smax_v8i32(i32* %arg) {
+define void @bcast_unfold_smax_v8i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_smax_v8i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -2780,13 +2778,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <8 x i32>*
-  %tmp4 = load <8 x i32>, <8 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp sgt <8 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
   %tmp6 = select <8 x i1> %tmp5, <8 x i32> %tmp4, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %tmp7 = bitcast i32* %tmp2 to <8 x i32>*
-  store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2795,7 +2793,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_smax_v16i32(i32* %arg) {
+define void @bcast_unfold_smax_v16i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_smax_v16i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -2815,13 +2813,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <16 x i32>*
-  %tmp4 = load <16 x i32>, <16 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp sgt <16 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
   %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %tmp7 = bitcast i32* %tmp2 to <16 x i32>*
-  store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <16 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 16
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2830,7 +2828,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_smax_v2i64(i64* %arg) {
+define void @bcast_unfold_smax_v2i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_smax_v2i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -2849,13 +2847,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <2 x i64>*
-  %tmp4 = load <2 x i64>, <2 x i64>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x i64>, ptr %tmp3, align 8
   %tmp5 = icmp sgt <2 x i64> %tmp4, <i64 2, i64 2>
   %tmp6 = select <2 x i1> %tmp5, <2 x i64> %tmp4, <2 x i64> <i64 2, i64 2>
-  %tmp7 = bitcast i64* %tmp2 to <2 x i64>*
-  store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <2 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 2
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2864,7 +2862,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_smax_v4i64(i64* %arg) {
+define void @bcast_unfold_smax_v4i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_smax_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -2884,13 +2882,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <4 x i64>*
-  %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i64>, ptr %tmp3, align 8
   %tmp5 = icmp sgt <4 x i64> %tmp4, <i64 2, i64 2, i64 2, i64 2>
   %tmp6 = select <4 x i1> %tmp5, <4 x i64> %tmp4, <4 x i64> <i64 2, i64 2, i64 2, i64 2>
-  %tmp7 = bitcast i64* %tmp2 to <4 x i64>*
-  store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2899,7 +2897,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_smax_v8i64(i64* %arg) {
+define void @bcast_unfold_smax_v8i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_smax_v8i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -2919,13 +2917,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <8 x i64>*
-  %tmp4 = load <8 x i64>, <8 x i64>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x i64>, ptr %tmp3, align 8
   %tmp5 = icmp sgt <8 x i64> %tmp4, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
   %tmp6 = select <8 x i1> %tmp5, <8 x i64> %tmp4, <8 x i64> <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-  %tmp7 = bitcast i64* %tmp2 to <8 x i64>*
-  store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2934,7 +2932,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_umin_v4i32(i32* %arg) {
+define void @bcast_unfold_umin_v4i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_umin_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -2953,13 +2951,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <4 x i32>*
-  %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp ult <4 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2>
   %tmp6 = select <4 x i1> %tmp5, <4 x i32> %tmp4, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
-  %tmp7 = bitcast i32* %tmp2 to <4 x i32>*
-  store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -2968,7 +2966,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_umin_v8i32(i32* %arg) {
+define void @bcast_unfold_umin_v8i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_umin_v8i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -2988,13 +2986,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <8 x i32>*
-  %tmp4 = load <8 x i32>, <8 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp ult <8 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
   %tmp6 = select <8 x i1> %tmp5, <8 x i32> %tmp4, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %tmp7 = bitcast i32* %tmp2 to <8 x i32>*
-  store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3003,7 +3001,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_umin_v16i32(i32* %arg) {
+define void @bcast_unfold_umin_v16i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_umin_v16i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -3023,13 +3021,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <16 x i32>*
-  %tmp4 = load <16 x i32>, <16 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp ult <16 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
   %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %tmp7 = bitcast i32* %tmp2 to <16 x i32>*
-  store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <16 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 16
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3038,7 +3036,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_umin_v2i64(i64* %arg) {
+define void @bcast_unfold_umin_v2i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_umin_v2i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -3057,13 +3055,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <2 x i64>*
-  %tmp4 = load <2 x i64>, <2 x i64>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x i64>, ptr %tmp3, align 8
   %tmp5 = icmp ult <2 x i64> %tmp4, <i64 2, i64 2>
   %tmp6 = select <2 x i1> %tmp5, <2 x i64> %tmp4, <2 x i64> <i64 2, i64 2>
-  %tmp7 = bitcast i64* %tmp2 to <2 x i64>*
-  store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <2 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 2
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3072,7 +3070,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_umin_v4i64(i64* %arg) {
+define void @bcast_unfold_umin_v4i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_umin_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -3092,13 +3090,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <4 x i64>*
-  %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i64>, ptr %tmp3, align 8
   %tmp5 = icmp ult <4 x i64> %tmp4, <i64 2, i64 2, i64 2, i64 2>
   %tmp6 = select <4 x i1> %tmp5, <4 x i64> %tmp4, <4 x i64> <i64 2, i64 2, i64 2, i64 2>
-  %tmp7 = bitcast i64* %tmp2 to <4 x i64>*
-  store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3107,7 +3105,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_umin_v8i64(i64* %arg) {
+define void @bcast_unfold_umin_v8i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_umin_v8i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -3127,13 +3125,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <8 x i64>*
-  %tmp4 = load <8 x i64>, <8 x i64>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x i64>, ptr %tmp3, align 8
   %tmp5 = icmp ult <8 x i64> %tmp4, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
   %tmp6 = select <8 x i1> %tmp5, <8 x i64> %tmp4, <8 x i64> <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-  %tmp7 = bitcast i64* %tmp2 to <8 x i64>*
-  store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3142,7 +3140,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_umax_v4i32(i32* %arg) {
+define void @bcast_unfold_umax_v4i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_umax_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -3161,13 +3159,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <4 x i32>*
-  %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp ugt <4 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2>
   %tmp6 = select <4 x i1> %tmp5, <4 x i32> %tmp4, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
-  %tmp7 = bitcast i32* %tmp2 to <4 x i32>*
-  store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3176,7 +3174,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_umax_v8i32(i32* %arg) {
+define void @bcast_unfold_umax_v8i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_umax_v8i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -3196,13 +3194,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <8 x i32>*
-  %tmp4 = load <8 x i32>, <8 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp ugt <8 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
   %tmp6 = select <8 x i1> %tmp5, <8 x i32> %tmp4, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %tmp7 = bitcast i32* %tmp2 to <8 x i32>*
-  store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3211,7 +3209,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_umax_v16i32(i32* %arg) {
+define void @bcast_unfold_umax_v16i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_umax_v16i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -3231,13 +3229,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <16 x i32>*
-  %tmp4 = load <16 x i32>, <16 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp ugt <16 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
   %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-  %tmp7 = bitcast i32* %tmp2 to <16 x i32>*
-  store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <16 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 16
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3246,7 +3244,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_umax_v2i64(i64* %arg) {
+define void @bcast_unfold_umax_v2i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_umax_v2i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -3265,13 +3263,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <2 x i64>*
-  %tmp4 = load <2 x i64>, <2 x i64>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x i64>, ptr %tmp3, align 8
   %tmp5 = icmp ugt <2 x i64> %tmp4, <i64 2, i64 2>
   %tmp6 = select <2 x i1> %tmp5, <2 x i64> %tmp4, <2 x i64> <i64 2, i64 2>
-  %tmp7 = bitcast i64* %tmp2 to <2 x i64>*
-  store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <2 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 2
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3280,7 +3278,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_umax_v4i64(i64* %arg) {
+define void @bcast_unfold_umax_v4i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_umax_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -3300,13 +3298,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <4 x i64>*
-  %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i64>, ptr %tmp3, align 8
   %tmp5 = icmp ugt <4 x i64> %tmp4, <i64 2, i64 2, i64 2, i64 2>
   %tmp6 = select <4 x i1> %tmp5, <4 x i64> %tmp4, <4 x i64> <i64 2, i64 2, i64 2, i64 2>
-  %tmp7 = bitcast i64* %tmp2 to <4 x i64>*
-  store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3315,7 +3313,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_umax_v8i64(i64* %arg) {
+define void @bcast_unfold_umax_v8i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_umax_v8i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -3335,13 +3333,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <8 x i64>*
-  %tmp4 = load <8 x i64>, <8 x i64>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x i64>, ptr %tmp3, align 8
   %tmp5 = icmp ugt <8 x i64> %tmp4, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
   %tmp6 = select <8 x i1> %tmp5, <8 x i64> %tmp4, <8 x i64> <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-  %tmp7 = bitcast i64* %tmp2 to <8 x i64>*
-  store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3350,7 +3348,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmpgt_v4i32(i32* %arg) {
+define void @bcast_unfold_pcmpgt_v4i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpgt_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -3371,13 +3369,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <4 x i32>*
-  %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp sgt <4 x i32> %tmp4, <i32 1, i32 1, i32 1, i32 1>
   %tmp6 = select <4 x i1> %tmp5, <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> %tmp4
-  %tmp7 = bitcast i32* %tmp2 to <4 x i32>*
-  store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3386,7 +3384,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmpgt_v8i32(i32* %arg) {
+define void @bcast_unfold_pcmpgt_v8i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpgt_v8i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -3408,13 +3406,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <8 x i32>*
-  %tmp4 = load <8 x i32>, <8 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp sgt <8 x i32> %tmp4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %tmp6 = select <8 x i1> %tmp5, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <8 x i32> %tmp4
-  %tmp7 = bitcast i32* %tmp2 to <8 x i32>*
-  store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3423,7 +3421,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmpgt_v16i32(i32* %arg) {
+define void @bcast_unfold_pcmpgt_v16i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpgt_v16i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -3445,13 +3443,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <16 x i32>*
-  %tmp4 = load <16 x i32>, <16 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp sgt <16 x i32> %tmp4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %tmp6 = select <16 x i1> %tmp5, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <16 x i32> %tmp4
-  %tmp7 = bitcast i32* %tmp2 to <16 x i32>*
-  store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <16 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 16
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3460,7 +3458,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmpgt_v2i64(i64* %arg) {
+define void @bcast_unfold_pcmpgt_v2i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpgt_v2i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -3481,13 +3479,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <2 x i64>*
-  %tmp4 = load <2 x i64>, <2 x i64>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x i64>, ptr %tmp3, align 4
   %tmp5 = icmp sgt <2 x i64> %tmp4, <i64 1, i64 1>
   %tmp6 = select <2 x i1> %tmp5, <2 x i64> <i64 3, i64 3>, <2 x i64> %tmp4
-  %tmp7 = bitcast i64* %tmp2 to <2 x i64>*
-  store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <2 x i64> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 2
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3495,7 +3493,7 @@ bb1:                                              ; preds = %bb1, %bb
 bb10:                                             ; preds = %bb1
   ret void
 }
-define void @bcast_unfold_pcmpgt_v4i64(i64* %arg) {
+define void @bcast_unfold_pcmpgt_v4i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpgt_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -3517,13 +3515,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <4 x i64>*
-  %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i64>, ptr %tmp3, align 4
   %tmp5 = icmp sgt <4 x i64> %tmp4, <i64 1, i64 1, i64 1, i64 1>
   %tmp6 = select <4 x i1> %tmp5, <4 x i64> <i64 3, i64 3, i64 3, i64 3>, <4 x i64> %tmp4
-  %tmp7 = bitcast i64* %tmp2 to <4 x i64>*
-  store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i64> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3532,7 +3530,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmpgt_v8i64(i64* %arg) {
+define void @bcast_unfold_pcmpgt_v8i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpgt_v8i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -3554,13 +3552,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <8 x i64>*
-  %tmp4 = load <8 x i64>, <8 x i64>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x i64>, ptr %tmp3, align 4
   %tmp5 = icmp sgt <8 x i64> %tmp4, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
   %tmp6 = select <8 x i1> %tmp5, <8 x i64> <i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3>, <8 x i64> %tmp4
-  %tmp7 = bitcast i64* %tmp2 to <8 x i64>*
-  store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x i64> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3569,7 +3567,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmpeq_v4i32(i32* %arg) {
+define void @bcast_unfold_pcmpeq_v4i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpeq_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -3590,13 +3588,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <4 x i32>*
-  %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp eq <4 x i32> %tmp4, <i32 1, i32 1, i32 1, i32 1>
   %tmp6 = select <4 x i1> %tmp5, <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> %tmp4
-  %tmp7 = bitcast i32* %tmp2 to <4 x i32>*
-  store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3605,7 +3603,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmpeq_v8i32(i32* %arg) {
+define void @bcast_unfold_pcmpeq_v8i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpeq_v8i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -3627,13 +3625,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <8 x i32>*
-  %tmp4 = load <8 x i32>, <8 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp eq <8 x i32> %tmp4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %tmp6 = select <8 x i1> %tmp5, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <8 x i32> %tmp4
-  %tmp7 = bitcast i32* %tmp2 to <8 x i32>*
-  store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3642,7 +3640,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmpeq_v16i32(i32* %arg) {
+define void @bcast_unfold_pcmpeq_v16i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpeq_v16i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -3664,13 +3662,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <16 x i32>*
-  %tmp4 = load <16 x i32>, <16 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp eq <16 x i32> %tmp4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %tmp6 = select <16 x i1> %tmp5, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <16 x i32> %tmp4
-  %tmp7 = bitcast i32* %tmp2 to <16 x i32>*
-  store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <16 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 16
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3679,7 +3677,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmpeq_v2i64(i64* %arg) {
+define void @bcast_unfold_pcmpeq_v2i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpeq_v2i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -3700,13 +3698,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <2 x i64>*
-  %tmp4 = load <2 x i64>, <2 x i64>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x i64>, ptr %tmp3, align 4
   %tmp5 = icmp eq <2 x i64> %tmp4, <i64 1, i64 1>
   %tmp6 = select <2 x i1> %tmp5, <2 x i64> <i64 3, i64 3>, <2 x i64> %tmp4
-  %tmp7 = bitcast i64* %tmp2 to <2 x i64>*
-  store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <2 x i64> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 2
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3714,7 +3712,7 @@ bb1:                                              ; preds = %bb1, %bb
 bb10:                                             ; preds = %bb1
   ret void
 }
-define void @bcast_unfold_pcmpeq_v4i64(i64* %arg) {
+define void @bcast_unfold_pcmpeq_v4i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpeq_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -3736,13 +3734,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <4 x i64>*
-  %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i64>, ptr %tmp3, align 4
   %tmp5 = icmp eq <4 x i64> %tmp4, <i64 1, i64 1, i64 1, i64 1>
   %tmp6 = select <4 x i1> %tmp5, <4 x i64> <i64 3, i64 3, i64 3, i64 3>, <4 x i64> %tmp4
-  %tmp7 = bitcast i64* %tmp2 to <4 x i64>*
-  store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i64> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3751,7 +3749,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmpeq_v8i64(i64* %arg) {
+define void @bcast_unfold_pcmpeq_v8i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpeq_v8i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -3773,13 +3771,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <8 x i64>*
-  %tmp4 = load <8 x i64>, <8 x i64>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x i64>, ptr %tmp3, align 4
   %tmp5 = icmp eq <8 x i64> %tmp4, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
   %tmp6 = select <8 x i1> %tmp5, <8 x i64> <i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3>, <8 x i64> %tmp4
-  %tmp7 = bitcast i64* %tmp2 to <8 x i64>*
-  store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x i64> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3788,7 +3786,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmp_v4i32(i32* %arg) {
+define void @bcast_unfold_pcmp_v4i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmp_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
@@ -3810,13 +3808,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <4 x i32>*
-  %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp slt <4 x i32> %tmp4, <i32 1, i32 1, i32 1, i32 1>
   %tmp6 = select <4 x i1> %tmp5, <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> %tmp4
-  %tmp7 = bitcast i32* %tmp2 to <4 x i32>*
-  store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp slt i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3825,7 +3823,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmp_v8i32(i32* %arg) {
+define void @bcast_unfold_pcmp_v8i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmp_v8i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
@@ -3848,13 +3846,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <8 x i32>*
-  %tmp4 = load <8 x i32>, <8 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp slt <8 x i32> %tmp4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %tmp6 = select <8 x i1> %tmp5, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <8 x i32> %tmp4
-  %tmp7 = bitcast i32* %tmp2 to <8 x i32>*
-  store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp slt i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3863,7 +3861,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmp_v16i32(i32* %arg) {
+define void @bcast_unfold_pcmp_v16i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmp_v16i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
@@ -3886,13 +3884,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <16 x i32>*
-  %tmp4 = load <16 x i32>, <16 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp slt <16 x i32> %tmp4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %tmp6 = select <16 x i1> %tmp5, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <16 x i32> %tmp4
-  %tmp7 = bitcast i32* %tmp2 to <16 x i32>*
-  store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <16 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 16
   %tmp9 = icmp slt i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3901,7 +3899,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmp_v2i64(i64* %arg) {
+define void @bcast_unfold_pcmp_v2i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmp_v2i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
@@ -3923,13 +3921,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <2 x i64>*
-  %tmp4 = load <2 x i64>, <2 x i64>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x i64>, ptr %tmp3, align 4
   %tmp5 = icmp slt <2 x i64> %tmp4, <i64 1, i64 1>
   %tmp6 = select <2 x i1> %tmp5, <2 x i64> <i64 3, i64 3>, <2 x i64> %tmp4
-  %tmp7 = bitcast i64* %tmp2 to <2 x i64>*
-  store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <2 x i64> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 2
   %tmp9 = icmp slt i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3937,7 +3935,7 @@ bb1:                                              ; preds = %bb1, %bb
 bb10:                                             ; preds = %bb1
   ret void
 }
-define void @bcast_unfold_pcmp_v4i64(i64* %arg) {
+define void @bcast_unfold_pcmp_v4i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmp_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
@@ -3960,13 +3958,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <4 x i64>*
-  %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i64>, ptr %tmp3, align 4
   %tmp5 = icmp slt <4 x i64> %tmp4, <i64 1, i64 1, i64 1, i64 1>
   %tmp6 = select <4 x i1> %tmp5, <4 x i64> <i64 3, i64 3, i64 3, i64 3>, <4 x i64> %tmp4
-  %tmp7 = bitcast i64* %tmp2 to <4 x i64>*
-  store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i64> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp slt i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -3975,7 +3973,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmp_v8i64(i64* %arg) {
+define void @bcast_unfold_pcmp_v8i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmp_v8i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
@@ -3998,13 +3996,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <8 x i64>*
-  %tmp4 = load <8 x i64>, <8 x i64>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x i64>, ptr %tmp3, align 4
   %tmp5 = icmp slt <8 x i64> %tmp4, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
   %tmp6 = select <8 x i1> %tmp5, <8 x i64> <i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3>, <8 x i64> %tmp4
-  %tmp7 = bitcast i64* %tmp2 to <8 x i64>*
-  store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x i64> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp slt i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4013,7 +4011,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmpu_v4i32(i32* %arg) {
+define void @bcast_unfold_pcmpu_v4i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpu_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
@@ -4035,13 +4033,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <4 x i32>*
-  %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp ult <4 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2>
   %tmp6 = select <4 x i1> %tmp5, <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> %tmp4
-  %tmp7 = bitcast i32* %tmp2 to <4 x i32>*
-  store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp ult i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4050,7 +4048,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmpu_v8i32(i32* %arg) {
+define void @bcast_unfold_pcmpu_v8i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpu_v8i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
@@ -4073,13 +4071,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <8 x i32>*
-  %tmp4 = load <8 x i32>, <8 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp ult <8 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
   %tmp6 = select <8 x i1> %tmp5, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <8 x i32> %tmp4
-  %tmp7 = bitcast i32* %tmp2 to <8 x i32>*
-  store <8 x i32> %tmp6, <8 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp ult i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4088,7 +4086,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmpu_v16i32(i32* %arg) {
+define void @bcast_unfold_pcmpu_v16i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpu_v16i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
@@ -4111,13 +4109,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <16 x i32>*
-  %tmp4 = load <16 x i32>, <16 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x i32>, ptr %tmp3, align 4
   %tmp5 = icmp ult <16 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
   %tmp6 = select <16 x i1> %tmp5, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>, <16 x i32> %tmp4
-  %tmp7 = bitcast i32* %tmp2 to <16 x i32>*
-  store <16 x i32> %tmp6, <16 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <16 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 16
   %tmp9 = icmp ult i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4126,7 +4124,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmpu_v2i64(i64* %arg) {
+define void @bcast_unfold_pcmpu_v2i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpu_v2i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
@@ -4148,13 +4146,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <2 x i64>*
-  %tmp4 = load <2 x i64>, <2 x i64>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x i64>, ptr %tmp3, align 4
   %tmp5 = icmp ult <2 x i64> %tmp4, <i64 2, i64 2>
   %tmp6 = select <2 x i1> %tmp5, <2 x i64> <i64 3, i64 3>, <2 x i64> %tmp4
-  %tmp7 = bitcast i64* %tmp2 to <2 x i64>*
-  store <2 x i64> %tmp6, <2 x i64>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <2 x i64> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 2
   %tmp9 = icmp ult i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4162,7 +4160,7 @@ bb1:                                              ; preds = %bb1, %bb
 bb10:                                             ; preds = %bb1
   ret void
 }
-define void @bcast_unfold_pcmpu_v4i64(i64* %arg) {
+define void @bcast_unfold_pcmpu_v4i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpu_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
@@ -4185,13 +4183,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <4 x i64>*
-  %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i64>, ptr %tmp3, align 4
   %tmp5 = icmp ult <4 x i64> %tmp4, <i64 2, i64 2, i64 2, i64 2>
   %tmp6 = select <4 x i1> %tmp5, <4 x i64> <i64 3, i64 3, i64 3, i64 3>, <4 x i64> %tmp4
-  %tmp7 = bitcast i64* %tmp2 to <4 x i64>*
-  store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i64> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp ult i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4200,7 +4198,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_pcmpu_v8i64(i64* %arg) {
+define void @bcast_unfold_pcmpu_v8i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_pcmpu_v8i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    xorl %eax, %eax
@@ -4223,13 +4221,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <8 x i64>*
-  %tmp4 = load <8 x i64>, <8 x i64>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x i64>, ptr %tmp3, align 4
   %tmp5 = icmp ult <8 x i64> %tmp4, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
   %tmp6 = select <8 x i1> %tmp5, <8 x i64> <i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3>, <8 x i64> %tmp4
-  %tmp7 = bitcast i64* %tmp2 to <8 x i64>*
-  store <8 x i64> %tmp6, <8 x i64>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x i64> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp ult i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4238,7 +4236,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_cmp_v4f32(float* %arg) {
+define void @bcast_unfold_cmp_v4f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_cmp_v4f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -4260,13 +4258,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <4 x float>*
-  %tmp4 = load <4 x float>, <4 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x float>, ptr %tmp3, align 4
   %tmp5 = fcmp olt <4 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
   %tmp6 = select <4 x i1> %tmp5, <4 x float> %tmp4, <4 x float> <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
-  %tmp7 = bitcast float* %tmp2 to <4 x float>*
-  store <4 x float> %tmp6, <4 x float>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x float> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4275,7 +4273,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_cmp_v8f32(float* %arg) {
+define void @bcast_unfold_cmp_v8f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_cmp_v8f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -4298,13 +4296,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <8 x float>*
-  %tmp4 = load <8 x float>, <8 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x float>, ptr %tmp3, align 4
   %tmp5 = fcmp olt <8 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
   %tmp6 = select <8 x i1> %tmp5, <8 x float> %tmp4, <8 x float> <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
-  %tmp7 = bitcast float* %tmp2 to <8 x float>*
-  store <8 x float> %tmp6, <8 x float>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x float> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4313,7 +4311,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_cmp_v16f32(float* %arg) {
+define void @bcast_unfold_cmp_v16f32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_cmp_v16f32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -4336,13 +4334,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds float, float* %arg, i64 %tmp
-  %tmp3 = bitcast float* %tmp2 to <16 x float>*
-  %tmp4 = load <16 x float>, <16 x float>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds float, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <16 x float>, ptr %tmp3, align 4
   %tmp5 = fcmp olt <16 x float> %tmp4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
   %tmp6 = select <16 x i1> %tmp5, <16 x float> %tmp4, <16 x float> <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
-  %tmp7 = bitcast float* %tmp2 to <16 x float>*
-  store <16 x float> %tmp6, <16 x float>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <16 x float> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 16
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4351,7 +4349,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_cmp_v2f64(double* %arg) {
+define void @bcast_unfold_cmp_v2f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_cmp_v2f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -4375,13 +4373,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <2 x double>*
-  %tmp4 = load <2 x double>, <2 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <2 x double>, ptr %tmp3, align 8
   %tmp5 = fcmp olt <2 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00>
   %tmp6 = select <2 x i1> %tmp5, <2 x double> %tmp4, <2 x double> <double 3.000000e+00, double 3.000000e+00>
-  %tmp7 = bitcast double* %tmp2 to <2 x double>*
-  store <2 x double> %tmp6, <2 x double>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <2 x double> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 2
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4390,7 +4388,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_cmp_v4f64(double* %arg) {
+define void @bcast_unfold_cmp_v4f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_cmp_v4f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -4413,13 +4411,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <4 x double>*
-  %tmp4 = load <4 x double>, <4 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x double>, ptr %tmp3, align 8
   %tmp5 = fcmp olt <4 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
   %tmp6 = select <4 x i1> %tmp5, <4 x double> %tmp4, <4 x double> <double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00>
-  %tmp7 = bitcast double* %tmp2 to <4 x double>*
-  store <4 x double> %tmp6, <4 x double>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x double> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4428,7 +4426,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_cmp_v8f64(double* %arg) {
+define void @bcast_unfold_cmp_v8f64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_cmp_v8f64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -4451,13 +4449,13 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds double, double* %arg, i64 %tmp
-  %tmp3 = bitcast double* %tmp2 to <8 x double>*
-  %tmp4 = load <8 x double>, <8 x double>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds double, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <8 x double>, ptr %tmp3, align 8
   %tmp5 = fcmp olt <8 x double> %tmp4, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
   %tmp6 = select <8 x i1> %tmp5, <8 x double> %tmp4, <8 x double> <double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00>
-  %tmp7 = bitcast double* %tmp2 to <8 x double>*
-  store <8 x double> %tmp6, <8 x double>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <8 x double> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 8
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4466,7 +4464,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_cmp_v8f32_refold(float* nocapture %0) {
+define void @bcast_unfold_cmp_v8f32_refold(ptr nocapture %0) {
 ; CHECK-LABEL: bcast_unfold_cmp_v8f32_refold:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -4486,13 +4484,13 @@ define void @bcast_unfold_cmp_v8f32_refold(float* nocapture %0) {
 
 2:                                                ; preds = %2, %1
   %3 = phi i64 [ 0, %1 ], [ %10, %2 ]
-  %4 = getelementptr inbounds float, float* %0, i64 %3
-  %5 = bitcast float* %4 to <8 x float>*
-  %6 = load <8 x float>, <8 x float>* %5, align 4
+  %4 = getelementptr inbounds float, ptr %0, i64 %3
+  %5 = bitcast ptr %4 to ptr
+  %6 = load <8 x float>, ptr %5, align 4
   %7 = fcmp olt <8 x float> %6, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
   %8 = select <8 x i1> %7, <8 x float> <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>, <8 x float> <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
-  %9 = bitcast float* %4 to <8 x float>*
-  store <8 x float> %8, <8 x float>* %9, align 4
+  %9 = bitcast ptr %4 to ptr
+  store <8 x float> %8, ptr %9, align 4
   %10 = add i64 %3, 8
   %11 = icmp eq i64 %10, 1024
   br i1 %11, label %12, label %2
@@ -4501,7 +4499,7 @@ define void @bcast_unfold_cmp_v8f32_refold(float* nocapture %0) {
   ret void
 }
 
-define void @bcast_unfold_ptestm_v4i32(i32* %arg) {
+define void @bcast_unfold_ptestm_v4i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_ptestm_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -4522,14 +4520,14 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <4 x i32>*
-  %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i32>, ptr %tmp3, align 4
   %tmp4b = and <4 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2>
   %tmp5 = icmp ne <4 x i32> %tmp4b, zeroinitializer
   %tmp6 = select <4 x i1> %tmp5, <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> %tmp4
-  %tmp7 = bitcast i32* %tmp2 to <4 x i32>*
-  store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4538,7 +4536,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_ptestnm_v4i32(i32* %arg) {
+define void @bcast_unfold_ptestnm_v4i32(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_ptestnm_v4i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -4559,14 +4557,14 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp3 = bitcast i32* %tmp2 to <4 x i32>*
-  %tmp4 = load <4 x i32>, <4 x i32>* %tmp3, align 4
+  %tmp2 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i32>, ptr %tmp3, align 4
   %tmp4b = and <4 x i32> %tmp4, <i32 2, i32 2, i32 2, i32 2>
   %tmp5 = icmp eq <4 x i32> %tmp4b, zeroinitializer
   %tmp6 = select <4 x i1> %tmp5, <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> %tmp4
-  %tmp7 = bitcast i32* %tmp2 to <4 x i32>*
-  store <4 x i32> %tmp6, <4 x i32>* %tmp7, align 4
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i32> %tmp6, ptr %tmp7, align 4
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4575,7 +4573,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_ptestm_v4i64(i64* %arg) {
+define void @bcast_unfold_ptestm_v4i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_ptestm_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -4597,14 +4595,14 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <4 x i64>*
-  %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i64>, ptr %tmp3, align 8
   %tmp4b = and <4 x i64> %tmp4, <i64 2, i64 2, i64 2, i64 2>
   %tmp5 = icmp ne <4 x i64> %tmp4b, zeroinitializer
   %tmp6 = select <4 x i1> %tmp5, <4 x i64> <i64 3, i64 3, i64 3, i64 3>, <4 x i64> %tmp4
-  %tmp7 = bitcast i64* %tmp2 to <4 x i64>*
-  store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4613,7 +4611,7 @@ bb10:                                             ; preds = %bb1
   ret void
 }
 
-define void @bcast_unfold_ptestnm_v4i64(i64* %arg) {
+define void @bcast_unfold_ptestnm_v4i64(ptr %arg) {
 ; CHECK-LABEL: bcast_unfold_ptestnm_v4i64:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-8192, %rax # imm = 0xE000
@@ -4635,14 +4633,14 @@ bb:
 
 bb1:                                              ; preds = %bb1, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb1 ]
-  %tmp2 = getelementptr inbounds i64, i64* %arg, i64 %tmp
-  %tmp3 = bitcast i64* %tmp2 to <4 x i64>*
-  %tmp4 = load <4 x i64>, <4 x i64>* %tmp3, align 8
+  %tmp2 = getelementptr inbounds i64, ptr %arg, i64 %tmp
+  %tmp3 = bitcast ptr %tmp2 to ptr
+  %tmp4 = load <4 x i64>, ptr %tmp3, align 8
   %tmp4b = and <4 x i64> %tmp4, <i64 2, i64 2, i64 2, i64 2>
   %tmp5 = icmp eq <4 x i64> %tmp4b, zeroinitializer
   %tmp6 = select <4 x i1> %tmp5, <4 x i64> <i64 3, i64 3, i64 3, i64 3>, <4 x i64> %tmp4
-  %tmp7 = bitcast i64* %tmp2 to <4 x i64>*
-  store <4 x i64> %tmp6, <4 x i64>* %tmp7, align 8
+  %tmp7 = bitcast ptr %tmp2 to ptr
+  store <4 x i64> %tmp6, ptr %tmp7, align 8
   %tmp8 = add i64 %tmp, 4
   %tmp9 = icmp eq i64 %tmp8, 1024
   br i1 %tmp9, label %bb10, label %bb1
@@ -4654,7 +4652,7 @@ bb10:                                             ; preds = %bb1
 ; The or/and pattern here should be turned into vpternlog. The multiply is
 ; there to increase the use count of the loads so they can't fold. We want to
 ; unfold the broadcast and pull it out of the loop.
-define void @bcast_unfold_vpternlog_v16i32(i32* %arg, i32* %arg1) {
+define void @bcast_unfold_vpternlog_v16i32(ptr %arg, ptr %arg1) {
 ; CHECK-LABEL: bcast_unfold_vpternlog_v16i32:
 ; CHECK:       # %bb.0: # %bb
 ; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
@@ -4678,19 +4676,19 @@ bb:
 
 bb2:                                              ; preds = %bb2, %bb
   %tmp = phi i64 [ 0, %bb ], [ %tmp18, %bb2 ]
-  %tmp3 = getelementptr inbounds i32, i32* %arg, i64 %tmp
-  %tmp4 = bitcast i32* %tmp3 to <16 x i32>*
-  %tmp5 = load <16 x i32>, <16 x i32>* %tmp4, align 4
-  %tmp6 = getelementptr inbounds i32, i32* %arg1, i64 %tmp
-  %tmp10 = bitcast i32* %tmp6 to <16 x i32>*
-  %tmp11 = load <16 x i32>, <16 x i32>* %tmp10, align 4
+  %tmp3 = getelementptr inbounds i32, ptr %arg, i64 %tmp
+  %tmp4 = bitcast ptr %tmp3 to ptr
+  %tmp5 = load <16 x i32>, ptr %tmp4, align 4
+  %tmp6 = getelementptr inbounds i32, ptr %arg1, i64 %tmp
+  %tmp10 = bitcast ptr %tmp6 to ptr
+  %tmp11 = load <16 x i32>, ptr %tmp10, align 4
   %tmp12 = and <16 x i32> %tmp5, <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
   %tmp13 = and <16 x i32> %tmp11, <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>
   %tmp14 = or <16 x i32> %tmp12, %tmp13
   %tmp15 = mul <16 x i32> %tmp14, %tmp5
   %tmp16 = mul <16 x i32> %tmp15, %tmp11
-  %tmp17 = bitcast i32* %tmp3 to <16 x i32>*
-  store <16 x i32> %tmp16, <16 x i32>* %tmp17, align 4
+  %tmp17 = bitcast ptr %tmp3 to ptr
+  store <16 x i32> %tmp16, ptr %tmp17, align 4
   %tmp18 = add i64 %tmp, 16
   %tmp19 = icmp eq i64 %tmp18, 1024
   br i1 %tmp19, label %bb20, label %bb2
diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll
index b2a0344392c9b..156e9a8a1edf0 100644
--- a/llvm/test/CodeGen/X86/block-placement.ll
+++ b/llvm/test/CodeGen/X86/block-placement.ll
@@ -1,11 +1,9 @@
-; ifndef INTEL_SYCL_OPAQUEPOINTER_READY
 ; RUN: llc -mtriple=i686-linux -pre-RA-sched=source < %s | FileCheck %s
 ; RUN: opt -disable-output -passes=debugify < %s
-; end
 
 declare void @error(i32 %i, i32 %a, i32 %b)
 
-define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) {
+define i32 @test_ifchains(i32 %i, ptr %a, i32 %b) {
 ; Test a chain of ifs, where the block guarded by the if is error handling code
 ; that is not expected to run.
 ; CHECK-LABEL: test_ifchains:
@@ -27,8 +25,8 @@ define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) {
 ; CHECK: %then5
 
 entry:
-  %gep1 = getelementptr i32, i32* %a, i32 1
-  %val1 = load i32, i32* %gep1
+  %gep1 = getelementptr i32, ptr %a, i32 1
+  %val1 = load i32, ptr %gep1
   %cond1 = icmp ugt i32 %val1, 1
   br i1 %cond1, label %then1, label %else1, !prof !0
 
@@ -37,8 +35,8 @@ then1:
   br label %else1
 
 else1:
-  %gep2 = getelementptr i32, i32* %a, i32 2
-  %val2 = load i32, i32* %gep2
+  %gep2 = getelementptr i32, ptr %a, i32 2
+  %val2 = load i32, ptr %gep2
   %cond2 = icmp ugt i32 %val2, 2
   br i1 %cond2, label %then2, label %else2, !prof !0
 
@@ -47,8 +45,8 @@ then2:
   br label %else2
 
 else2:
-  %gep3 = getelementptr i32, i32* %a, i32 3
-  %val3 = load i32, i32* %gep3
+  %gep3 = getelementptr i32, ptr %a, i32 3
+  %val3 = load i32, ptr %gep3
   %cond3 = icmp ugt i32 %val3, 3
   br i1 %cond3, label %then3, label %else3, !prof !0
 
@@ -57,8 +55,8 @@ then3:
   br label %else3
 
 else3:
-  %gep4 = getelementptr i32, i32* %a, i32 4
-  %val4 = load i32, i32* %gep4
+  %gep4 = getelementptr i32, ptr %a, i32 4
+  %val4 = load i32, ptr %gep4
   %cond4 = icmp ugt i32 %val4, 4
   br i1 %cond4, label %then4, label %else4, !prof !0
 
@@ -67,8 +65,8 @@ then4:
   br label %else4
 
 else4:
-  %gep5 = getelementptr i32, i32* %a, i32 3
-  %val5 = load i32, i32* %gep5
+  %gep5 = getelementptr i32, ptr %a, i32 3
+  %val5 = load i32, ptr %gep5
   %cond5 = icmp ugt i32 %val5, 3
   br i1 %cond5, label %then5, label %exit, !prof !0
 
@@ -80,7 +78,7 @@ exit:
   ret i32 %b
 }
 
-define i32 @test_loop_cold_blocks(i32 %i, i32* %a) {
+define i32 @test_loop_cold_blocks(i32 %i, ptr %a) {
 ; Check that we sink cold loop blocks after the hot loop body.
 ; CHECK-LABEL: test_loop_cold_blocks:
 ; CHECK: %entry
@@ -116,8 +114,8 @@ unlikely2:
   br label %body3
 
 body3:
-  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv
+  %0 = load i32, ptr %arrayidx
   %sum = add nsw i32 %0, %base
   %next = add i32 %iv, 1
   %exitcond = icmp eq i32 %next, %i
@@ -129,7 +127,7 @@ exit:
 
 !0 = !{!"branch_weights", i32 1, i32 64}
 
-define i32 @test_loop_early_exits(i32 %i, i32* %a) {
+define i32 @test_loop_early_exits(i32 %i, ptr %a) {
 ; Check that we sink early exit blocks out of loop bodies.
 ; CHECK-LABEL: test_loop_early_exits:
 ; CHECK: %entry
@@ -169,8 +167,8 @@ bail3:
   ret i32 -3
 
 body4:
-  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv
+  %0 = load i32, ptr %arrayidx
   %sum = add nsw i32 %0, %base
   %next = add i32 %iv, 1
   %exitcond = icmp eq i32 %next, %i
@@ -186,7 +184,7 @@ exit:
 ; duplicated, we add some calls to dummy.
 declare void @dummy()
 
-define i32 @test_loop_rotate(i32 %i, i32* %a) {
+define i32 @test_loop_rotate(i32 %i, ptr %a) {
 ; Check that we rotate conditional exits from the loop to the bottom of the
 ; loop, eliminating unconditional branches to the top.
 ; CHECK-LABEL: test_loop_rotate:
@@ -208,8 +206,8 @@ body0:
   br i1 %exitcond, label %exit, label %body1
 
 body1:
-  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv
+  %0 = load i32, ptr %arrayidx
   %sum = add nsw i32 %0, %base
   %bailcond1 = icmp eq i32 %sum, 42
   br label %body0
@@ -218,7 +216,7 @@ exit:
   ret i32 %base
 }
 
-define i32 @test_no_loop_rotate(i32 %i, i32* %a) {
+define i32 @test_no_loop_rotate(i32 %i, ptr %a) {
 ; Check that we don't try to rotate a loop which is already laid out with
 ; fallthrough opportunities into the top and out of the bottom.
 ; CHECK-LABEL: test_no_loop_rotate:
@@ -233,8 +231,8 @@ entry:
 body0:
   %iv = phi i32 [ 0, %entry ], [ %next, %body1 ]
   %base = phi i32 [ 0, %entry ], [ %sum, %body1 ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv
+  %0 = load i32, ptr %arrayidx
   %sum = add nsw i32 %0, %base
   %bailcond1 = icmp eq i32 %sum, 42
   br i1 %bailcond1, label %exit, label %body1
@@ -248,7 +246,7 @@ exit:
   ret i32 %base
 }
 
-define i32 @test_loop_align(i32 %i, i32* %a) {
+define i32 @test_loop_align(i32 %i, ptr %a) {
 ; Check that we provide basic loop body alignment with the block placement
 ; pass.
 ; CHECK-LABEL: test_loop_align:
@@ -263,8 +261,8 @@ entry:
 body:
   %iv = phi i32 [ 0, %entry ], [ %next, %body ]
   %base = phi i32 [ 0, %entry ], [ %sum, %body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
-  %0 = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv
+  %0 = load i32, ptr %arrayidx
   %sum = add nsw i32 %0, %base
   %next = add i32 %iv, 1
   %exitcond = icmp eq i32 %next, %i
@@ -274,7 +272,7 @@ exit:
   ret i32 %sum
 }
 
-define i32 @test_nested_loop_align(i32 %i, i32* %a, i32* %b) {
+define i32 @test_nested_loop_align(i32 %i, ptr %a, ptr %b) {
 ; Check that we provide nested loop body alignment.
 ; CHECK-LABEL: test_nested_loop_align:
 ; CHECK: %entry
@@ -290,16 +288,16 @@ entry:
 
 loop.body.1:
   %iv = phi i32 [ 0, %entry ], [ %next, %loop.body.2 ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
-  %bidx = load i32, i32* %arrayidx
+  %arrayidx = getelementptr inbounds i32, ptr %a, i32 %iv
+  %bidx = load i32, ptr %arrayidx
   br label %inner.loop.body
 
 inner.loop.body:
   %inner.iv = phi i32 [ 0, %loop.body.1 ], [ %inner.next, %inner.loop.body ]
   %base = phi i32 [ 0, %loop.body.1 ], [ %sum, %inner.loop.body ]
   %scaled_idx = mul i32 %bidx, %iv
-  %inner.arrayidx = getelementptr inbounds i32, i32* %b, i32 %scaled_idx
-  %0 = load i32, i32* %inner.arrayidx
+  %inner.arrayidx = getelementptr inbounds i32, ptr %b, i32 %scaled_idx
+  %0 = load i32, ptr %inner.arrayidx
   %sum = add nsw i32 %0, %base
   %inner.next = add i32 %iv, 1
   %inner.exitcond = icmp eq i32 %inner.next, %i
@@ -333,13 +331,13 @@ loop.body1:
   br i1 undef, label %loop.body3, label %loop.body2
 
 loop.body2:
-  %ptr = load i32*, i32** undef, align 4
+  %ptr = load ptr, ptr undef, align 4
   br label %loop.body3
 
 loop.body3:
-  %myptr = phi i32* [ %ptr2, %loop.body5 ], [ %ptr, %loop.body2 ], [ undef, %loop.body1 ]
-  %bcmyptr = bitcast i32* %myptr to i32*
-  %val = load i32, i32* %bcmyptr, align 4
+  %myptr = phi ptr [ %ptr2, %loop.body5 ], [ %ptr, %loop.body2 ], [ undef, %loop.body1 ]
+  %bcmyptr = bitcast ptr %myptr to ptr
+  %val = load i32, ptr %bcmyptr, align 4
   %comp = icmp eq i32 %val, 48
   br i1 %comp, label %loop.body4, label %loop.body5
 
@@ -347,11 +345,11 @@ loop.body4:
   br i1 undef, label %loop.header, label %loop.body5
 
 loop.body5:
-  %ptr2 = load i32*, i32** undef, align 4
+  %ptr2 = load ptr, ptr undef, align 4
   br label %loop.body3
 }
 
-define void @unnatural_cfg2(i32* %p0, i32 %a0) {
+define void @unnatural_cfg2(ptr %p0, i32 %a0) {
 ; Test that we can handle a loop with a nested natural loop *and* an unnatural
 ; loop. This was reduced from a crash on block placement when run over
 ; single-source GCC.
@@ -371,32 +369,32 @@ entry:
   br label %loop.header
 
 loop.header:
-  %comp0 = icmp eq i32* %p0, null
+  %comp0 = icmp eq ptr %p0, null
   br i1 %comp0, label %bail, label %loop.body1
 
 loop.body1:
-  %val0 = load i32*, i32** undef, align 4
+  %val0 = load ptr, ptr undef, align 4
   br i1 undef, label %loop.body2, label %loop.inner1.begin
 
 loop.body2:
   br i1 undef, label %loop.body4, label %loop.body3
 
 loop.body3:
-  %ptr1 = getelementptr inbounds i32, i32* %val0, i32 0
-  %castptr1 = bitcast i32* %ptr1 to i32**
-  %val1 = load i32*, i32** %castptr1, align 4
+  %ptr1 = getelementptr inbounds i32, ptr %val0, i32 0
+  %castptr1 = bitcast ptr %ptr1 to ptr
+  %val1 = load ptr, ptr %castptr1, align 4
   br label %loop.inner1.begin
 
 loop.inner1.begin:
-  %valphi = phi i32* [ %val2, %loop.inner1.end ], [ %val1, %loop.body3 ], [ %val0, %loop.body1 ]
-  %castval = bitcast i32* %valphi to i32*
+  %valphi = phi ptr [ %val2, %loop.inner1.end ], [ %val1, %loop.body3 ], [ %val0, %loop.body1 ]
+  %castval = bitcast ptr %valphi to ptr
   %comp1 = icmp eq i32 %a0, 48
   br i1 %comp1, label %loop.inner1.end, label %loop.body4
 
 loop.inner1.end:
-  %ptr2 = getelementptr inbounds i32, i32* %valphi, i32 0
-  %castptr2 = bitcast i32* %ptr2 to i32**
-  %val2 = load i32*, i32** %castptr2, align 4
+  %ptr2 = getelementptr inbounds i32, ptr %valphi, i32 0
+  %castptr2 = bitcast ptr %ptr2 to ptr
+  %val2 = load ptr, ptr %castptr2, align 4
   br label %loop.inner1.begin
 
 loop.body4.dead:
@@ -493,7 +491,7 @@ entry:
   br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1
 
 entry.if.then_crit_edge:
-  %.pre14 = load i8, i8* undef, align 1
+  %.pre14 = load i8, ptr undef, align 1
   br label %if.then
 
 lor.lhs.false:
@@ -506,7 +504,7 @@ exit:
 if.then:
   %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ]
   %1 = and i8 %0, 1
-  store i8 %1, i8* undef, align 4
+  store i8 %1, ptr undef, align 4
   br label %if.end
 
 if.end:
@@ -554,7 +552,7 @@ exit:
 
 declare i32 @__gxx_personality_v0(...)
 
-define void @test_eh_lpad_successor() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @test_eh_lpad_successor() personality ptr bitcast (i32 (...)* @__gxx_personality_v0 to ptr) {
 ; Some times the landing pad ends up as the first successor of an invoke block.
 ; When this happens, a strange result used to fall out of updateTerminators: we
 ; didn't correctly locate the fallthrough successor, assuming blindly that the
@@ -572,9 +570,9 @@ preheader:
   br label %loop
 
 lpad:
-  %lpad.val = landingpad { i8*, i32 }
+  %lpad.val = landingpad { ptr, i32 }
           cleanup
-  resume { i8*, i32 } %lpad.val
+  resume { ptr, i32 } %lpad.val
 
 loop:
   br label %loop
@@ -582,7 +580,7 @@ loop:
 
 declare void @fake_throw() noreturn
 
-define void @test_eh_throw() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @test_eh_throw() personality ptr bitcast (i32 (...)* @__gxx_personality_v0 to ptr) {
 ; For blocks containing a 'throw' (or similar functionality), we have
 ; a no-return invoke. In this case, only EH successors will exist, and
 ; fallthrough simply won't occur. Make sure we don't crash trying to update
@@ -599,7 +597,7 @@ continue:
   unreachable
 
 cleanup:
-  %0 = landingpad { i8*, i32 }
+  %0 = landingpad { ptr, i32 }
           cleanup
   unreachable
 }
@@ -622,24 +620,24 @@ body:
   br label %loop2a
 
 loop1:
-  %next.load = load i32*, i32** undef
+  %next.load = load ptr, ptr undef
   br i1 %comp.a, label %loop2a, label %loop2b
 
 loop2a:
-  %var = phi i32* [ null, %entry ], [ null, %body ], [ %next.phi, %loop1 ]
-  %next.var = phi i32* [ null, %entry ], [ undef, %body ], [ %next.load, %loop1 ]
-  %comp.a = icmp eq i32* %var, null
+  %var = phi ptr [ null, %entry ], [ null, %body ], [ %next.phi, %loop1 ]
+  %next.var = phi ptr [ null, %entry ], [ undef, %body ], [ %next.load, %loop1 ]
+  %comp.a = icmp eq ptr %var, null
   br label %loop3
 
 loop2b:
-  %gep = getelementptr inbounds i32, i32* %var.phi, i32 0
-  %next.ptr = bitcast i32* %gep to i32**
-  store i32* %next.phi, i32** %next.ptr
+  %gep = getelementptr inbounds i32, ptr %var.phi, i32 0
+  %next.ptr = bitcast ptr %gep to ptr
+  store ptr %next.phi, ptr %next.ptr
   br label %loop3
 
 loop3:
-  %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
-  %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
+  %var.phi = phi ptr [ %next.phi, %loop2b ], [ %var, %loop2a ]
+  %next.phi = phi ptr [ %next.load, %loop2b ], [ %next.var, %loop2a ]
   br label %loop1
 }
 
@@ -737,199 +735,199 @@ define void @many_unanalyzable_branches() {
 entry:
   br label %0
 
-  %val0 = load volatile float, float* undef
+  %val0 = load volatile float, ptr undef
   %cmp0 = fcmp une float %val0, 0.0
   br i1 %cmp0, label %1, label %0
-  %val1 = load volatile float, float* undef
+  %val1 = load volatile float, ptr undef
   %cmp1 = fcmp une float %val1, 0.0
   br i1 %cmp1, label %2, label %1
-  %val2 = load volatile float, float* undef
+  %val2 = load volatile float, ptr undef
   %cmp2 = fcmp une float %val2, 0.0
   br i1 %cmp2, label %3, label %2
-  %val3 = load volatile float, float* undef
+  %val3 = load volatile float, ptr undef
   %cmp3 = fcmp une float %val3, 0.0
   br i1 %cmp3, label %4, label %3
-  %val4 = load volatile float, float* undef
+  %val4 = load volatile float, ptr undef
   %cmp4 = fcmp une float %val4, 0.0
   br i1 %cmp4, label %5, label %4
-  %val5 = load volatile float, float* undef
+  %val5 = load volatile float, ptr undef
   %cmp5 = fcmp une float %val5, 0.0
   br i1 %cmp5, label %6, label %5
-  %val6 = load volatile float, float* undef
+  %val6 = load volatile float, ptr undef
   %cmp6 = fcmp une float %val6, 0.0
   br i1 %cmp6, label %7, label %6
-  %val7 = load volatile float, float* undef
+  %val7 = load volatile float, ptr undef
   %cmp7 = fcmp une float %val7, 0.0
   br i1 %cmp7, label %8, label %7
-  %val8 = load volatile float, float* undef
+  %val8 = load volatile float, ptr undef
   %cmp8 = fcmp une float %val8, 0.0
   br i1 %cmp8, label %9, label %8
-  %val9 = load volatile float, float* undef
+  %val9 = load volatile float, ptr undef
   %cmp9 = fcmp une float %val9, 0.0
   br i1 %cmp9, label %10, label %9
-  %val10 = load volatile float, float* undef
+  %val10 = load volatile float, ptr undef
   %cmp10 = fcmp une float %val10, 0.0
   br i1 %cmp10, label %11, label %10
-  %val11 = load volatile float, float* undef
+  %val11 = load volatile float, ptr undef
   %cmp11 = fcmp une float %val11, 0.0
   br i1 %cmp11, label %12, label %11
-  %val12 = load volatile float, float* undef
+  %val12 = load volatile float, ptr undef
   %cmp12 = fcmp une float %val12, 0.0
   br i1 %cmp12, label %13, label %12
-  %val13 = load volatile float, float* undef
+  %val13 = load volatile float, ptr undef
   %cmp13 = fcmp une float %val13, 0.0
   br i1 %cmp13, label %14, label %13
-  %val14 = load volatile float, float* undef
+  %val14 = load volatile float, ptr undef
   %cmp14 = fcmp une float %val14, 0.0
   br i1 %cmp14, label %15, label %14
-  %val15 = load volatile float, float* undef
+  %val15 = load volatile float, ptr undef
   %cmp15 = fcmp une float %val15, 0.0
   br i1 %cmp15, label %16, label %15
-  %val16 = load volatile float, float* undef
+  %val16 = load volatile float, ptr undef
   %cmp16 = fcmp une float %val16, 0.0
   br i1 %cmp16, label %17, label %16
-  %val17 = load volatile float, float* undef
+  %val17 = load volatile float, ptr undef
   %cmp17 = fcmp une float %val17, 0.0
   br i1 %cmp17, label %18, label %17
-  %val18 = load volatile float, float* undef
+  %val18 = load volatile float, ptr undef
   %cmp18 = fcmp une float %val18, 0.0
   br i1 %cmp18, label %19, label %18
-  %val19 = load volatile float, float* undef
+  %val19 = load volatile float, ptr undef
   %cmp19 = fcmp une float %val19, 0.0
   br i1 %cmp19, label %20, label %19
-  %val20 = load volatile float, float* undef
+  %val20 = load volatile float, ptr undef
   %cmp20 = fcmp une float %val20, 0.0
   br i1 %cmp20, label %21, label %20
-  %val21 = load volatile float, float* undef
+  %val21 = load volatile float, ptr undef
   %cmp21 = fcmp une float %val21, 0.0
   br i1 %cmp21, label %22, label %21
-  %val22 = load volatile float, float* undef
+  %val22 = load volatile float, ptr undef
   %cmp22 = fcmp une float %val22, 0.0
   br i1 %cmp22, label %23, label %22
-  %val23 = load volatile float, float* undef
+  %val23 = load volatile float, ptr undef
   %cmp23 = fcmp une float %val23, 0.0
   br i1 %cmp23, label %24, label %23
-  %val24 = load volatile float, float* undef
+  %val24 = load volatile float, ptr undef
   %cmp24 = fcmp une float %val24, 0.0
   br i1 %cmp24, label %25, label %24
-  %val25 = load volatile float, float* undef
+  %val25 = load volatile float, ptr undef
   %cmp25 = fcmp une float %val25, 0.0
   br i1 %cmp25, label %26, label %25
-  %val26 = load volatile float, float* undef
+  %val26 = load volatile float, ptr undef
   %cmp26 = fcmp une float %val26, 0.0
   br i1 %cmp26, label %27, label %26
-  %val27 = load volatile float, float* undef
+  %val27 = load volatile float, ptr undef
   %cmp27 = fcmp une float %val27, 0.0
   br i1 %cmp27, label %28, label %27
-  %val28 = load volatile float, float* undef
+  %val28 = load volatile float, ptr undef
   %cmp28 = fcmp une float %val28, 0.0
   br i1 %cmp28, label %29, label %28
-  %val29 = load volatile float, float* undef
+  %val29 = load volatile float, ptr undef
   %cmp29 = fcmp une float %val29, 0.0
   br i1 %cmp29, label %30, label %29
-  %val30 = load volatile float, float* undef
+  %val30 = load volatile float, ptr undef
   %cmp30 = fcmp une float %val30, 0.0
   br i1 %cmp30, label %31, label %30
-  %val31 = load volatile float, float* undef
+  %val31 = load volatile float, ptr undef
   %cmp31 = fcmp une float %val31, 0.0
   br i1 %cmp31, label %32, label %31
-  %val32 = load volatile float, float* undef
+  %val32 = load volatile float, ptr undef
   %cmp32 = fcmp une float %val32, 0.0
   br i1 %cmp32, label %33, label %32
-  %val33 = load volatile float, float* undef
+  %val33 = load volatile float, ptr undef
   %cmp33 = fcmp une float %val33, 0.0
   br i1 %cmp33, label %34, label %33
-  %val34 = load volatile float, float* undef
+  %val34 = load volatile float, ptr undef
   %cmp34 = fcmp une float %val34, 0.0
   br i1 %cmp34, label %35, label %34
-  %val35 = load volatile float, float* undef
+  %val35 = load volatile float, ptr undef
   %cmp35 = fcmp une float %val35, 0.0
   br i1 %cmp35, label %36, label %35
-  %val36 = load volatile float, float* undef
+  %val36 = load volatile float, ptr undef
   %cmp36 = fcmp une float %val36, 0.0
   br i1 %cmp36, label %37, label %36
-  %val37 = load volatile float, float* undef
+  %val37 = load volatile float, ptr undef
   %cmp37 = fcmp une float %val37, 0.0
   br i1 %cmp37, label %38, label %37
-  %val38 = load volatile float, float* undef
+  %val38 = load volatile float, ptr undef
   %cmp38 = fcmp une float %val38, 0.0
   br i1 %cmp38, label %39, label %38
-  %val39 = load volatile float, float* undef
+  %val39 = load volatile float, ptr undef
   %cmp39 = fcmp une float %val39, 0.0
   br i1 %cmp39, label %40, label %39
-  %val40 = load volatile float, float* undef
+  %val40 = load volatile float, ptr undef
   %cmp40 = fcmp une float %val40, 0.0
   br i1 %cmp40, label %41, label %40
-  %val41 = load volatile float, float* undef
+  %val41 = load volatile float, ptr undef
   %cmp41 = fcmp une float %val41, undef
   br i1 %cmp41, label %42, label %41
-  %val42 = load volatile float, float* undef
+  %val42 = load volatile float, ptr undef
   %cmp42 = fcmp une float %val42, 0.0
   br i1 %cmp42, label %43, label %42
-  %val43 = load volatile float, float* undef
+  %val43 = load volatile float, ptr undef
   %cmp43 = fcmp une float %val43, 0.0
   br i1 %cmp43, label %44, label %43
-  %val44 = load volatile float, float* undef
+  %val44 = load volatile float, ptr undef
   %cmp44 = fcmp une float %val44, 0.0
   br i1 %cmp44, label %45, label %44
-  %val45 = load volatile float, float* undef
+  %val45 = load volatile float, ptr undef
   %cmp45 = fcmp une float %val45, 0.0
   br i1 %cmp45, label %46, label %45
-  %val46 = load volatile float, float* undef
+  %val46 = load volatile float, ptr undef
   %cmp46 = fcmp une float %val46, 0.0
   br i1 %cmp46, label %47, label %46
-  %val47 = load volatile float, float* undef
+  %val47 = load volatile float, ptr undef
   %cmp47 = fcmp une float %val47, 0.0
   br i1 %cmp47, label %48, label %47
-  %val48 = load volatile float, float* undef
+  %val48 = load volatile float, ptr undef
   %cmp48 = fcmp une float %val48, 0.0
   br i1 %cmp48, label %49, label %48
-  %val49 = load volatile float, float* undef
+  %val49 = load volatile float, ptr undef
   %cmp49 = fcmp une float %val49, 0.0
   br i1 %cmp49, label %50, label %49
-  %val50 = load volatile float, float* undef
+  %val50 = load volatile float, ptr undef
   %cmp50 = fcmp une float %val50, 0.0
   br i1 %cmp50, label %51, label %50
-  %val51 = load volatile float, float* undef
+  %val51 = load volatile float, ptr undef
   %cmp51 = fcmp une float %val51, 0.0
   br i1 %cmp51, label %52, label %51
-  %val52 = load volatile float, float* undef
+  %val52 = load volatile float, ptr undef
   %cmp52 = fcmp une float %val52, 0.0
   br i1 %cmp52, label %53, label %52
-  %val53 = load volatile float, float* undef
+  %val53 = load volatile float, ptr undef
   %cmp53 = fcmp une float %val53, 0.0
   br i1 %cmp53, label %54, label %53
-  %val54 = load volatile float, float* undef
+  %val54 = load volatile float, ptr undef
   %cmp54 = fcmp une float %val54, 0.0
   br i1 %cmp54, label %55, label %54
-  %val55 = load volatile float, float* undef
+  %val55 = load volatile float, ptr undef
   %cmp55 = fcmp une float %val55, 0.0
   br i1 %cmp55, label %56, label %55
-  %val56 = load volatile float, float* undef
+  %val56 = load volatile float, ptr undef
   %cmp56 = fcmp une float %val56, 0.0
   br i1 %cmp56, label %57, label %56
-  %val57 = load volatile float, float* undef
+  %val57 = load volatile float, ptr undef
   %cmp57 = fcmp une float %val57, 0.0
   br i1 %cmp57, label %58, label %57
-  %val58 = load volatile float, float* undef
+  %val58 = load volatile float, ptr undef
   %cmp58 = fcmp une float %val58, 0.0
   br i1 %cmp58, label %59, label %58
-  %val59 = load volatile float, float* undef
+  %val59 = load volatile float, ptr undef
   %cmp59 = fcmp une float %val59, 0.0
   br i1 %cmp59, label %60, label %59
-  %val60 = load volatile float, float* undef
+  %val60 = load volatile float, ptr undef
   %cmp60 = fcmp une float %val60, 0.0
   br i1 %cmp60, label %61, label %60
-  %val61 = load volatile float, float* undef
+  %val61 = load volatile float, ptr undef
   %cmp61 = fcmp une float %val61, 0.0
   br i1 %cmp61, label %62, label %61
-  %val62 = load volatile float, float* undef
+  %val62 = load volatile float, ptr undef
   %cmp62 = fcmp une float %val62, 0.0
   br i1 %cmp62, label %63, label %62
-  %val63 = load volatile float, float* undef
+  %val63 = load volatile float, ptr undef
   %cmp63 = fcmp une float %val63, 0.0
   br i1 %cmp63, label %64, label %63
-  %val64 = load volatile float, float* undef
+  %val64 = load volatile float, ptr undef
   %cmp64 = fcmp une float %val64, 0.0
   br i1 %cmp64, label %65, label %64
 
@@ -938,7 +936,7 @@ exit:
   ret void
 }
 
-define void @benchmark_heapsort(i32 %n, double* nocapture %ra) {
+define void @benchmark_heapsort(i32 %n, ptr nocapture %ra) {
 ; This test case comes from the heapsort benchmark, and exemplifies several
 ; important aspects to block placement in the presence of loops:
 ; 1) Loop rotation needs to *ensure* that the desired exiting edge can be
@@ -974,7 +972,7 @@ define void @benchmark_heapsort(i32 %n, double* nocapture %ra) {
 entry:
   %shr = ashr i32 %n, 1
   %add = add nsw i32 %shr, 1
-  %arrayidx3 = getelementptr inbounds double, double* %ra, i64 1
+  %arrayidx3 = getelementptr inbounds double, ptr %ra, i64 1
   br label %for.cond
 
 for.cond:
@@ -986,22 +984,22 @@ for.cond:
 if.then:
   %dec = add nsw i32 %l.0, -1
   %idxprom = sext i32 %dec to i64
-  %arrayidx = getelementptr inbounds double, double* %ra, i64 %idxprom
-  %0 = load double, double* %arrayidx, align 8
+  %arrayidx = getelementptr inbounds double, ptr %ra, i64 %idxprom
+  %0 = load double, ptr %arrayidx, align 8
   br label %if.end10
 
 if.else:
   %idxprom1 = sext i32 %ir.0 to i64
-  %arrayidx2 = getelementptr inbounds double, double* %ra, i64 %idxprom1
-  %1 = load double, double* %arrayidx2, align 8
-  %2 = load double, double* %arrayidx3, align 8
-  store double %2, double* %arrayidx2, align 8
+  %arrayidx2 = getelementptr inbounds double, ptr %ra, i64 %idxprom1
+  %1 = load double, ptr %arrayidx2, align 8
+  %2 = load double, ptr %arrayidx3, align 8
+  store double %2, ptr %arrayidx2, align 8
   %dec6 = add nsw i32 %ir.0, -1
   %cmp7 = icmp eq i32 %dec6, 1
   br i1 %cmp7, label %if.then8, label %if.end10
 
 if.then8:
-  store double %1, double* %arrayidx3, align 8
+  store double %1, ptr %arrayidx3, align 8
   ret void
 
 if.end10:
@@ -1027,12 +1025,12 @@ while.body:
 
 land.lhs.true:
   %idxprom13 = sext i32 %j.0 to i64
-  %arrayidx14 = getelementptr inbounds double, double* %ra, i64 %idxprom13
-  %3 = load double, double* %arrayidx14, align 8
+  %arrayidx14 = getelementptr inbounds double, ptr %ra, i64 %idxprom13
+  %3 = load double, ptr %arrayidx14, align 8
   %add15 = add nsw i32 %j.0, 1
   %idxprom16 = sext i32 %add15 to i64
-  %arrayidx17 = getelementptr inbounds double, double* %ra, i64 %idxprom16
-  %4 = load double, double* %arrayidx17, align 8
+  %arrayidx17 = getelementptr inbounds double, ptr %ra, i64 %idxprom16
+  %4 = load double, ptr %arrayidx17, align 8
   %cmp18 = fcmp olt double %3, %4
   br i1 %cmp18, label %if.then19, label %if.end20
 
@@ -1042,27 +1040,27 @@ if.then19:
 if.end20:
   %j.1 = phi i32 [ %add15, %if.then19 ], [ %j.0, %land.lhs.true ], [ %j.0, %while.body ]
   %idxprom21 = sext i32 %j.1 to i64
-  %arrayidx22 = getelementptr inbounds double, double* %ra, i64 %idxprom21
-  %5 = load double, double* %arrayidx22, align 8
+  %arrayidx22 = getelementptr inbounds double, ptr %ra, i64 %idxprom21
+  %5 = load double, ptr %arrayidx22, align 8
   %cmp23 = fcmp olt double %rra.0, %5
   br i1 %cmp23, label %if.then24, label %while.cond
 
 if.then24:
   %idxprom27 = sext i32 %j.0.ph.in to i64
-  %arrayidx28 = getelementptr inbounds double, double* %ra, i64 %idxprom27
-  store double %5, double* %arrayidx28, align 8
+  %arrayidx28 = getelementptr inbounds double, ptr %ra, i64 %idxprom27
+  store double %5, ptr %arrayidx28, align 8
   br label %while.cond.outer
 
 while.end:
   %idxprom33 = sext i32 %j.0.ph.in to i64
-  %arrayidx34 = getelementptr inbounds double, double* %ra, i64 %idxprom33
-  store double %rra.0, double* %arrayidx34, align 8
+  %arrayidx34 = getelementptr inbounds double, ptr %ra, i64 %idxprom33
+  store double %rra.0, ptr %arrayidx34, align 8
   br label %for.cond
 }
 
 declare void @cold_function() cold
 
-define i32 @test_cold_calls(i32* %a) {
+define i32 @test_cold_calls(ptr %a) {
 ; Test that edges to blocks post-dominated by cold calls are
 ; marked as not expected to be taken.  They should be laid out
 ; at the bottom.
@@ -1073,8 +1071,8 @@ define i32 @test_cold_calls(i32* %a) {
 ; CHECK: %then
 
 entry:
-  %gep1 = getelementptr i32, i32* %a, i32 1
-  %val1 = load i32, i32* %gep1
+  %gep1 = getelementptr i32, ptr %a, i32 1
+  %val1 = load i32, ptr %gep1
   %cond1 = icmp ugt i32 %val1, 1
   br i1 %cond1, label %then, label %else
 
@@ -1083,8 +1081,8 @@ then:
   br label %exit
 
 else:
-  %gep2 = getelementptr i32, i32* %a, i32 2
-  %val2 = load i32, i32* %gep2
+  %gep2 = getelementptr i32, ptr %a, i32 2
+  %val2 = load i32, ptr %gep2
   br label %exit
 
 exit:
@@ -1125,12 +1123,12 @@ then:
   ret i32 %3
 
 hotlp:
-  %4 = landingpad { i8*, i32 }
+  %4 = landingpad { ptr, i32 }
           cleanup
   br label %lpret
 
 coldlp:
-  %5 = landingpad { i8*, i32 }
+  %5 = landingpad { ptr, i32 }
           cleanup
   br label %lpret
 
@@ -1166,19 +1164,19 @@ exit:
   ret void
 
 innerlp:
-  %2 = landingpad { i8*, i32 }
+  %2 = landingpad { ptr, i32 }
           cleanup
   br label %innercleanup
 
 outerlp:
-  %3 = landingpad { i8*, i32 }
+  %3 = landingpad { ptr, i32 }
           cleanup
   br label %outercleanup
 
 outercleanup:
-  %4 = phi { i8*, i32 } [%2, %innercleanup], [%3, %outerlp]
+  %4 = phi { ptr, i32 } [%2, %innercleanup], [%3, %outerlp]
   call void @clean()
-  resume { i8*, i32 } %4
+  resume { ptr, i32 } %4
 
 innercleanup:
   call void @clean()
@@ -1187,7 +1185,7 @@ innercleanup:
 
 declare void @hot_function()
 
-define void @test_hot_branch(i32* %a) {
+define void @test_hot_branch(ptr %a) {
 ; Test that a hot branch that has a probability a little larger than 80% will
 ; break CFG constrains when doing block placement.
 ; CHECK-LABEL: test_hot_branch:
@@ -1197,8 +1195,8 @@ define void @test_hot_branch(i32* %a) {
 ; CHECK: %else
 
 entry:
-  %gep1 = getelementptr i32, i32* %a, i32 1
-  %val1 = load i32, i32* %gep1
+  %gep1 = getelementptr i32, ptr %a, i32 1
+  %val1 = load i32, ptr %gep1
   %cond1 = icmp ugt i32 %val1, 1
   br i1 %cond1, label %then, label %else, !prof !5
 
@@ -1215,7 +1213,7 @@ exit:
   ret void
 }
 
-define void @test_hot_branch_profile(i32* %a) !prof !6 {
+define void @test_hot_branch_profile(ptr %a) !prof !6 {
 ; Test that a hot branch that has a probability a little larger than 50% will
 ; break CFG constrains when doing block placement when profile is available.
 ; CHECK-LABEL: test_hot_branch_profile:
@@ -1225,8 +1223,8 @@ define void @test_hot_branch_profile(i32* %a) !prof !6 {
 ; CHECK: %else
 
 entry:
-  %gep1 = getelementptr i32, i32* %a, i32 1
-  %val1 = load i32, i32* %gep1
+  %gep1 = getelementptr i32, ptr %a, i32 1
+  %val1 = load i32, ptr %gep1
   %cond1 = icmp ugt i32 %val1, 1
   br i1 %cond1, label %then, label %else, !prof !7
 
@@ -1243,7 +1241,7 @@ exit:
   ret void
 }
 
-define void @test_hot_branch_triangle_profile(i32* %a) !prof !6 {
+define void @test_hot_branch_triangle_profile(ptr %a) !prof !6 {
 ; Test that a hot branch that has a probability a little larger than 80% will
 ; break triangle shaped CFG constrains when doing block placement if profile
 ; is present.
@@ -1253,8 +1251,8 @@ define void @test_hot_branch_triangle_profile(i32* %a) !prof !6 {
 ; CHECK: %then
 
 entry:
-  %gep1 = getelementptr i32, i32* %a, i32 1
-  %val1 = load i32, i32* %gep1
+  %gep1 = getelementptr i32, ptr %a, i32 1
+  %val1 = load i32, ptr %gep1
   %cond1 = icmp ugt i32 %val1, 1
   br i1 %cond1, label %exit, label %then, !prof !5
 
@@ -1267,7 +1265,7 @@ exit:
   ret void
 }
 
-define void @test_hot_branch_triangle_profile_topology(i32* %a) !prof !6 {
+define void @test_hot_branch_triangle_profile_topology(ptr %a) !prof !6 {
 ; Test that a hot branch that has a probability between 50% and 66% will not
 ; break triangle shaped CFG constrains when doing block placement if profile
 ; is present.
@@ -1277,8 +1275,8 @@ define void @test_hot_branch_triangle_profile_topology(i32* %a) !prof !6 {
 ; CHECK: %exit
 
 entry:
-  %gep1 = getelementptr i32, i32* %a, i32 1
-  %val1 = load i32, i32* %gep1
+  %gep1 = getelementptr i32, ptr %a, i32 1
+  %val1 = load i32, ptr %gep1
   %cond1 = icmp ugt i32 %val1, 1
   br i1 %cond1, label %exit, label %then, !prof !7
 
@@ -1294,7 +1292,7 @@ exit:
 declare void @a()
 declare void @b()
 
-define void @test_forked_hot_diamond(i32* %a) {
+define void @test_forked_hot_diamond(ptr %a) {
 ; Test that a hot-branch with probability > 80% followed by a 50/50 branch
 ; will not place the cold predecessor if the probability for the fallthrough
 ; remains above 80%
@@ -1306,22 +1304,22 @@ define void @test_forked_hot_diamond(i32* %a) {
 ; CHECK: %fork2
 ; CHECK: %exit
 entry:
-  %gep1 = getelementptr i32, i32* %a, i32 1
-  %val1 = load i32, i32* %gep1
+  %gep1 = getelementptr i32, ptr %a, i32 1
+  %val1 = load i32, ptr %gep1
   %cond1 = icmp ugt i32 %val1, 1
   br i1 %cond1, label %then, label %else, !prof !5
 
 then:
   call void @hot_function()
-  %gep2 = getelementptr i32, i32* %a, i32 2
-  %val2 = load i32, i32* %gep2
+  %gep2 = getelementptr i32, ptr %a, i32 2
+  %val2 = load i32, ptr %gep2
   %cond2 = icmp ugt i32 %val2, 2
   br i1 %cond2, label %fork1, label %fork2, !prof !8
 
 else:
   call void @cold_function()
-  %gep3 = getelementptr i32, i32* %a, i32 3
-  %val3 = load i32, i32* %gep3
+  %gep3 = getelementptr i32, ptr %a, i32 3
+  %val3 = load i32, ptr %gep3
   %cond3 = icmp ugt i32 %val3, 3
   br i1 %cond3, label %fork1, label %fork2, !prof !8
 
@@ -1338,7 +1336,7 @@ exit:
   ret void
 }
 
-define void @test_forked_hot_diamond_gets_cold(i32* %a) {
+define void @test_forked_hot_diamond_gets_cold(ptr %a) {
 ; Test that a hot-branch with probability > 80% followed by a 50/50 branch
 ; will place the cold predecessor if the probability for the fallthrough
 ; falls below 80%
@@ -1359,15 +1357,15 @@ define void @test_forked_hot_diamond_gets_cold(i32* %a) {
 ; CHECK: %fork2
 ; CHECK: %exit
 entry:
-  %gep1 = getelementptr i32, i32* %a, i32 1
-  %val1 = load i32, i32* %gep1
+  %gep1 = getelementptr i32, ptr %a, i32 1
+  %val1 = load i32, ptr %gep1
   %cond1 = icmp ugt i32 %val1, 1
   br i1 %cond1, label %then1, label %else1, !prof !9
 
 then1:
   call void @hot_function()
-  %gep2 = getelementptr i32, i32* %a, i32 2
-  %val2 = load i32, i32* %gep2
+  %gep2 = getelementptr i32, ptr %a, i32 2
+  %val2 = load i32, ptr %gep2
   %cond2 = icmp ugt i32 %val2, 2
   br i1 %cond2, label %then2, label %else2, !prof !9
 
@@ -1377,8 +1375,8 @@ else1:
 
 then2:
   call void @hot_function()
-  %gep3 = getelementptr i32, i32* %a, i32 3
-  %val3 = load i32, i32* %gep2
+  %gep3 = getelementptr i32, ptr %a, i32 3
+  %val3 = load i32, ptr %gep2
   %cond3 = icmp ugt i32 %val2, 3
   br i1 %cond3, label %fork1, label %fork2, !prof !8
 
@@ -1399,7 +1397,7 @@ exit:
   ret void
 }
 
-define void @test_forked_hot_diamond_stays_hot(i32* %a) {
+define void @test_forked_hot_diamond_stays_hot(ptr %a) {
 ; Test that a hot-branch with probability > 88.88% (1:8) followed by a 50/50
 ; branch will not place the cold predecessor as the probability for the
 ; fallthrough stays above 80%
@@ -1416,15 +1414,15 @@ define void @test_forked_hot_diamond_stays_hot(i32* %a) {
 ; CHECK: %fork2
 ; CHECK: %exit
 entry:
-  %gep1 = getelementptr i32, i32* %a, i32 1
-  %val1 = load i32, i32* %gep1
+  %gep1 = getelementptr i32, ptr %a, i32 1
+  %val1 = load i32, ptr %gep1
   %cond1 = icmp ugt i32 %val1, 1
   br i1 %cond1, label %then1, label %else1, !prof !10
 
 then1:
   call void @hot_function()
-  %gep2 = getelementptr i32, i32* %a, i32 2
-  %val2 = load i32, i32* %gep2
+  %gep2 = getelementptr i32, ptr %a, i32 2
+  %val2 = load i32, ptr %gep2
   %cond2 = icmp ugt i32 %val2, 2
   br i1 %cond2, label %then2, label %else2, !prof !10
 
@@ -1434,8 +1432,8 @@ else1:
 
 then2:
   call void @hot_function()
-  %gep3 = getelementptr i32, i32* %a, i32 3
-  %val3 = load i32, i32* %gep2
+  %gep3 = getelementptr i32, ptr %a, i32 3
+  %val3 = load i32, ptr %gep2
   %cond3 = icmp ugt i32 %val2, 3
   br i1 %cond3, label %fork1, label %fork2, !prof !8
 
diff --git a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
index e3c108d7a5843..8f7cbbfb08df0 100644
--- a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -1,14 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; ifndef INTEL_SYCL_OPAQUEPOINTER_READY
 ; RUN: llc < %s -mtriple=x86_64-darwin | FileCheck %s --check-prefix=GENERIC
 ; RUN: llc < %s -mtriple=x86_64-darwin -mcpu=atom | FileCheck %s --check-prefix=ATOM
-; end
 
-@Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
-@Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
-@Te3 = external global [256 x i32]		; <[256 x i32]*> [#uses=2]
+@Te0 = external global [256 x i32]		; <ptr> [#uses=5]
+@Te1 = external global [256 x i32]		; <ptr> [#uses=4]
+@Te3 = external global [256 x i32]		; <ptr> [#uses=2]
 
-define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind {
+define void @t(ptr nocapture %in, ptr nocapture %out, ptr nocapture %rk, i32 %r) nounwind {
 ; GENERIC-LABEL: t:
 ; GENERIC:       ## %bb.0: ## %entry
 ; GENERIC-NEXT:    pushq %rbp
@@ -177,9 +175,9 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
 ; ATOM-NEXT:    popq %rbp
 ; ATOM-NEXT:    retq
 entry:
-	%0 = load i32, i32* %rk, align 4		; <i32> [#uses=1]
-	%1 = getelementptr i32, i32* %rk, i64 1		; <i32*> [#uses=1]
-	%2 = load i32, i32* %1, align 4		; <i32> [#uses=1]
+	%0 = load i32, ptr %rk, align 4		; <i32> [#uses=1]
+	%1 = getelementptr i32, ptr %rk, i64 1		; <ptr> [#uses=1]
+	%2 = load i32, ptr %1, align 4		; <i32> [#uses=1]
 	%tmp15 = add i32 %r, -1		; <i32> [#uses=1]
 	%tmp.16 = zext i32 %tmp15 to i64		; <i64> [#uses=2]
 	br label %bb
@@ -189,67 +187,67 @@ bb:		; preds = %bb1, %entry
 	%s1.0 = phi i32 [ %2, %entry ], [ %56, %bb1 ]		; <i32> [#uses=2]
 	%s0.0 = phi i32 [ %0, %entry ], [ %43, %bb1 ]		; <i32> [#uses=2]
 	%tmp18 = shl i64 %indvar, 4		; <i64> [#uses=4]
-	%rk26 = bitcast i32* %rk to i8*		; <i8*> [#uses=6]
+	%rk26 = bitcast ptr %rk to ptr		; <ptr> [#uses=6]
 	%3 = lshr i32 %s0.0, 24		; <i32> [#uses=1]
 	%4 = zext i32 %3 to i64		; <i64> [#uses=1]
-	%5 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %4		; <i32*> [#uses=1]
-	%6 = load i32, i32* %5, align 4		; <i32> [#uses=1]
+	%5 = getelementptr [256 x i32], ptr @Te0, i64 0, i64 %4		; <ptr> [#uses=1]
+	%6 = load i32, ptr %5, align 4		; <i32> [#uses=1]
 	%7 = lshr i32 %s1.0, 16		; <i32> [#uses=1]
 	%8 = and i32 %7, 255		; <i32> [#uses=1]
 	%9 = zext i32 %8 to i64		; <i64> [#uses=1]
-	%10 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %9		; <i32*> [#uses=1]
-	%11 = load i32, i32* %10, align 4		; <i32> [#uses=1]
+	%10 = getelementptr [256 x i32], ptr @Te1, i64 0, i64 %9		; <ptr> [#uses=1]
+	%11 = load i32, ptr %10, align 4		; <i32> [#uses=1]
 	%ctg2.sum2728 = or i64 %tmp18, 8		; <i64> [#uses=1]
-	%12 = getelementptr i8, i8* %rk26, i64 %ctg2.sum2728		; <i8*> [#uses=1]
-	%13 = bitcast i8* %12 to i32*		; <i32*> [#uses=1]
-	%14 = load i32, i32* %13, align 4		; <i32> [#uses=1]
+	%12 = getelementptr i8, ptr %rk26, i64 %ctg2.sum2728		; <ptr> [#uses=1]
+	%13 = bitcast ptr %12 to ptr		; <ptr> [#uses=1]
+	%14 = load i32, ptr %13, align 4		; <i32> [#uses=1]
 	%15 = xor i32 %11, %6		; <i32> [#uses=1]
 	%16 = xor i32 %15, %14		; <i32> [#uses=3]
 	%17 = lshr i32 %s1.0, 24		; <i32> [#uses=1]
 	%18 = zext i32 %17 to i64		; <i64> [#uses=1]
-	%19 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %18		; <i32*> [#uses=1]
-	%20 = load i32, i32* %19, align 4		; <i32> [#uses=1]
+	%19 = getelementptr [256 x i32], ptr @Te0, i64 0, i64 %18		; <ptr> [#uses=1]
+	%20 = load i32, ptr %19, align 4		; <i32> [#uses=1]
 	%21 = and i32 %s0.0, 255		; <i32> [#uses=1]
 	%22 = zext i32 %21 to i64		; <i64> [#uses=1]
-	%23 = getelementptr [256 x i32], [256 x i32]* @Te3, i64 0, i64 %22		; <i32*> [#uses=1]
-	%24 = load i32, i32* %23, align 4		; <i32> [#uses=1]
+	%23 = getelementptr [256 x i32], ptr @Te3, i64 0, i64 %22		; <ptr> [#uses=1]
+	%24 = load i32, ptr %23, align 4		; <i32> [#uses=1]
 	%ctg2.sum2930 = or i64 %tmp18, 12		; <i64> [#uses=1]
-	%25 = getelementptr i8, i8* %rk26, i64 %ctg2.sum2930		; <i8*> [#uses=1]
-	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
-	%27 = load i32, i32* %26, align 4		; <i32> [#uses=1]
+	%25 = getelementptr i8, ptr %rk26, i64 %ctg2.sum2930		; <ptr> [#uses=1]
+	%26 = bitcast ptr %25 to ptr		; <ptr> [#uses=1]
+	%27 = load i32, ptr %26, align 4		; <i32> [#uses=1]
 	%28 = xor i32 %24, %20		; <i32> [#uses=1]
 	%29 = xor i32 %28, %27		; <i32> [#uses=4]
 	%30 = lshr i32 %16, 24		; <i32> [#uses=1]
 	%31 = zext i32 %30 to i64		; <i64> [#uses=1]
-	%32 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %31		; <i32*> [#uses=1]
-	%33 = load i32, i32* %32, align 4		; <i32> [#uses=2]
+	%32 = getelementptr [256 x i32], ptr @Te0, i64 0, i64 %31		; <ptr> [#uses=1]
+	%33 = load i32, ptr %32, align 4		; <i32> [#uses=2]
 	%exitcond = icmp eq i64 %indvar, %tmp.16		; <i1> [#uses=1]
 	br i1 %exitcond, label %bb2, label %bb1
 
 bb1:		; preds = %bb
 	%ctg2.sum31 = add i64 %tmp18, 16		; <i64> [#uses=1]
-	%34 = getelementptr i8, i8* %rk26, i64 %ctg2.sum31		; <i8*> [#uses=1]
-	%35 = bitcast i8* %34 to i32*		; <i32*> [#uses=1]
+	%34 = getelementptr i8, ptr %rk26, i64 %ctg2.sum31		; <ptr> [#uses=1]
+	%35 = bitcast ptr %34 to ptr		; <ptr> [#uses=1]
 	%36 = lshr i32 %29, 16		; <i32> [#uses=1]
 	%37 = and i32 %36, 255		; <i32> [#uses=1]
 	%38 = zext i32 %37 to i64		; <i64> [#uses=1]
-	%39 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %38		; <i32*> [#uses=1]
-	%40 = load i32, i32* %39, align 4		; <i32> [#uses=1]
-	%41 = load i32, i32* %35, align 4		; <i32> [#uses=1]
+	%39 = getelementptr [256 x i32], ptr @Te1, i64 0, i64 %38		; <ptr> [#uses=1]
+	%40 = load i32, ptr %39, align 4		; <i32> [#uses=1]
+	%41 = load i32, ptr %35, align 4		; <i32> [#uses=1]
 	%42 = xor i32 %40, %33		; <i32> [#uses=1]
 	%43 = xor i32 %42, %41		; <i32> [#uses=1]
 	%44 = lshr i32 %29, 24		; <i32> [#uses=1]
 	%45 = zext i32 %44 to i64		; <i64> [#uses=1]
-	%46 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %45		; <i32*> [#uses=1]
-	%47 = load i32, i32* %46, align 4		; <i32> [#uses=1]
+	%46 = getelementptr [256 x i32], ptr @Te0, i64 0, i64 %45		; <ptr> [#uses=1]
+	%47 = load i32, ptr %46, align 4		; <i32> [#uses=1]
 	%48 = and i32 %16, 255		; <i32> [#uses=1]
 	%49 = zext i32 %48 to i64		; <i64> [#uses=1]
-	%50 = getelementptr [256 x i32], [256 x i32]* @Te3, i64 0, i64 %49		; <i32*> [#uses=1]
-	%51 = load i32, i32* %50, align 4		; <i32> [#uses=1]
+	%50 = getelementptr [256 x i32], ptr @Te3, i64 0, i64 %49		; <ptr> [#uses=1]
+	%51 = load i32, ptr %50, align 4		; <i32> [#uses=1]
 	%ctg2.sum32 = add i64 %tmp18, 20		; <i64> [#uses=1]
-	%52 = getelementptr i8, i8* %rk26, i64 %ctg2.sum32		; <i8*> [#uses=1]
-	%53 = bitcast i8* %52 to i32*		; <i32*> [#uses=1]
-	%54 = load i32, i32* %53, align 4		; <i32> [#uses=1]
+	%52 = getelementptr i8, ptr %rk26, i64 %ctg2.sum32		; <ptr> [#uses=1]
+	%53 = bitcast ptr %52 to ptr		; <ptr> [#uses=1]
+	%54 = load i32, ptr %53, align 4		; <i32> [#uses=1]
 	%55 = xor i32 %51, %47		; <i32> [#uses=1]
 	%56 = xor i32 %55, %54		; <i32> [#uses=1]
 	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
@@ -258,49 +256,49 @@ bb1:		; preds = %bb
 bb2:		; preds = %bb
 	%tmp10 = shl i64 %tmp.16, 4		; <i64> [#uses=2]
 	%ctg2.sum = add i64 %tmp10, 16		; <i64> [#uses=1]
-	%tmp1213 = getelementptr i8, i8* %rk26, i64 %ctg2.sum		; <i8*> [#uses=1]
-	%57 = bitcast i8* %tmp1213 to i32*		; <i32*> [#uses=1]
+	%tmp1213 = getelementptr i8, ptr %rk26, i64 %ctg2.sum		; <ptr> [#uses=1]
+	%57 = bitcast ptr %tmp1213 to ptr		; <ptr> [#uses=1]
 	%58 = and i32 %33, -16777216		; <i32> [#uses=1]
 	%59 = lshr i32 %29, 16		; <i32> [#uses=1]
 	%60 = and i32 %59, 255		; <i32> [#uses=1]
 	%61 = zext i32 %60 to i64		; <i64> [#uses=1]
-	%62 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %61		; <i32*> [#uses=1]
-	%63 = load i32, i32* %62, align 4		; <i32> [#uses=1]
+	%62 = getelementptr [256 x i32], ptr @Te1, i64 0, i64 %61		; <ptr> [#uses=1]
+	%63 = load i32, ptr %62, align 4		; <i32> [#uses=1]
 	%64 = and i32 %63, 16711680		; <i32> [#uses=1]
 	%65 = or i32 %64, %58		; <i32> [#uses=1]
-	%66 = load i32, i32* %57, align 4		; <i32> [#uses=1]
+	%66 = load i32, ptr %57, align 4		; <i32> [#uses=1]
 	%67 = xor i32 %65, %66		; <i32> [#uses=2]
 	%68 = lshr i32 %29, 8		; <i32> [#uses=1]
 	%69 = zext i32 %68 to i64		; <i64> [#uses=1]
-	%70 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %69		; <i32*> [#uses=1]
-	%71 = load i32, i32* %70, align 4		; <i32> [#uses=1]
+	%70 = getelementptr [256 x i32], ptr @Te0, i64 0, i64 %69		; <ptr> [#uses=1]
+	%71 = load i32, ptr %70, align 4		; <i32> [#uses=1]
 	%72 = and i32 %71, -16777216		; <i32> [#uses=1]
 	%73 = and i32 %16, 255		; <i32> [#uses=1]
 	%74 = zext i32 %73 to i64		; <i64> [#uses=1]
-	%75 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %74		; <i32*> [#uses=1]
-	%76 = load i32, i32* %75, align 4		; <i32> [#uses=1]
+	%75 = getelementptr [256 x i32], ptr @Te1, i64 0, i64 %74		; <ptr> [#uses=1]
+	%76 = load i32, ptr %75, align 4		; <i32> [#uses=1]
 	%77 = and i32 %76, 16711680		; <i32> [#uses=1]
 	%78 = or i32 %77, %72		; <i32> [#uses=1]
 	%ctg2.sum25 = add i64 %tmp10, 20		; <i64> [#uses=1]
-	%79 = getelementptr i8, i8* %rk26, i64 %ctg2.sum25		; <i8*> [#uses=1]
-	%80 = bitcast i8* %79 to i32*		; <i32*> [#uses=1]
-	%81 = load i32, i32* %80, align 4		; <i32> [#uses=1]
+	%79 = getelementptr i8, ptr %rk26, i64 %ctg2.sum25		; <ptr> [#uses=1]
+	%80 = bitcast ptr %79 to ptr		; <ptr> [#uses=1]
+	%81 = load i32, ptr %80, align 4		; <i32> [#uses=1]
 	%82 = xor i32 %78, %81		; <i32> [#uses=2]
 	%83 = lshr i32 %67, 24		; <i32> [#uses=1]
 	%84 = trunc i32 %83 to i8		; <i8> [#uses=1]
-	store i8 %84, i8* %out, align 1
+	store i8 %84, ptr %out, align 1
 	%85 = lshr i32 %67, 16		; <i32> [#uses=1]
 	%86 = trunc i32 %85 to i8		; <i8> [#uses=1]
-	%87 = getelementptr i8, i8* %out, i64 1		; <i8*> [#uses=1]
-	store i8 %86, i8* %87, align 1
-	%88 = getelementptr i8, i8* %out, i64 4		; <i8*> [#uses=1]
+	%87 = getelementptr i8, ptr %out, i64 1		; <ptr> [#uses=1]
+	store i8 %86, ptr %87, align 1
+	%88 = getelementptr i8, ptr %out, i64 4		; <ptr> [#uses=1]
 	%89 = lshr i32 %82, 24		; <i32> [#uses=1]
 	%90 = trunc i32 %89 to i8		; <i8> [#uses=1]
-	store i8 %90, i8* %88, align 1
+	store i8 %90, ptr %88, align 1
 	%91 = lshr i32 %82, 16		; <i32> [#uses=1]
 	%92 = trunc i32 %91 to i8		; <i8> [#uses=1]
-	%93 = getelementptr i8, i8* %out, i64 5		; <i8*> [#uses=1]
-	store i8 %92, i8* %93, align 1
+	%93 = getelementptr i8, ptr %out, i64 5		; <ptr> [#uses=1]
+	store i8 %92, ptr %93, align 1
 	ret void
 }
 
@@ -308,7 +306,7 @@ bb2:		; preds = %bb
 ; is equal to the stride.
 ; It must not fold (cmp (add iv, 1), 1) --> (cmp iv, 0).
 
-define i32 @f(i32 %i, i32* nocapture %a) nounwind uwtable readonly ssp {
+define i32 @f(i32 %i, ptr nocapture %a) nounwind uwtable readonly ssp {
 ; GENERIC-LABEL: f:
 ; GENERIC:       ## %bb.0: ## %entry
 ; GENERIC-NEXT:    xorl %eax, %eax
@@ -370,8 +368,8 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %bi.06 = phi i32 [ 0, %for.body.lr.ph ], [ %i.addr.0.bi.0, %for.body ]
   %b.05 = phi i32 [ 0, %for.body.lr.ph ], [ %.b.0, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx, align 4
   %cmp1 = icmp ugt i32 %1, %b.05
   %.b.0 = select i1 %cmp1, i32 %1, i32 %b.05
   %2 = trunc i64 %indvars.iv to i32
diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
index 801e395a86d78..177ae2aa40578 100644
--- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll
+++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
@@ -1,5 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; ifndef INTEL_SYCL_OPAQUEPOINTER_READY
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -mattr=prefer-256-bit | FileCheck %s --check-prefixes=CHECK,CHECK-SKX
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -mattr=prefer-256-bit,avx512vbmi | FileCheck %s --check-prefixes=CHECK,CHECK-SKX,CHECK-SKX-VBMI
 ; Make sure CPUs default to prefer-256-bit. avx512vnni isn't interesting as it just adds an isel peephole for vpmaddwd+vpaddd
@@ -10,11 +9,10 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-avx512vnni -mcpu=icelake-client | FileCheck %s --check-prefixes=CHECK,CHECK-VBMI
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-avx512vnni -mcpu=icelake-server | FileCheck %s --check-prefixes=CHECK,CHECK-VBMI
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-avx512vnni -mcpu=tigerlake | FileCheck %s --check-prefixes=CHECK,CHECK-VBMI
-; end
 
 ; This file primarily contains tests for specific places in X86ISelLowering.cpp that needed be made aware of the legalizer not allowing 512-bit vectors due to prefer-256-bit even though AVX512 is enabled.
 
-define dso_local void @add256(<16 x i32>* %a, <16 x i32>* %b, <16 x i32>* %c) "min-legal-vector-width"="256" {
+define dso_local void @add256(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: add256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rdi), %ymm0
@@ -25,14 +23,14 @@ define dso_local void @add256(<16 x i32>* %a, <16 x i32>* %b, <16 x i32>* %c) "m
 ; CHECK-NEXT:    vmovdqa %ymm1, 32(%rdx)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %d = load <16 x i32>, <16 x i32>* %a
-  %e = load <16 x i32>, <16 x i32>* %b
+  %d = load <16 x i32>, ptr %a
+  %e = load <16 x i32>, ptr %b
   %f = add <16 x i32> %d, %e
-  store <16 x i32> %f, <16 x i32>* %c
+  store <16 x i32> %f, ptr %c
   ret void
 }
 
-define dso_local void @add512(<16 x i32>* %a, <16 x i32>* %b, <16 x i32>* %c) "min-legal-vector-width"="512" {
+define dso_local void @add512(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"="512" {
 ; CHECK-LABEL: add512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa64 (%rdi), %zmm0
@@ -40,14 +38,14 @@ define dso_local void @add512(<16 x i32>* %a, <16 x i32>* %b, <16 x i32>* %c) "m
 ; CHECK-NEXT:    vmovdqa64 %zmm0, (%rdx)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %d = load <16 x i32>, <16 x i32>* %a
-  %e = load <16 x i32>, <16 x i32>* %b
+  %d = load <16 x i32>, ptr %a
+  %e = load <16 x i32>, ptr %b
   %f = add <16 x i32> %d, %e
-  store <16 x i32> %f, <16 x i32>* %c
+  store <16 x i32> %f, ptr %c
   ret void
 }
 
-define dso_local void @avg_v64i8_256(<64 x i8>* %a, <64 x i8>* %b) "min-legal-vector-width"="256" {
+define dso_local void @avg_v64i8_256(ptr %a, ptr %b) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: avg_v64i8_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rsi), %ymm0
@@ -58,20 +56,20 @@ define dso_local void @avg_v64i8_256(<64 x i8>* %a, <64 x i8>* %b) "min-legal-ve
 ; CHECK-NEXT:    vmovdqu %ymm0, (%rax)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %1 = load <64 x i8>, <64 x i8>* %a
-  %2 = load <64 x i8>, <64 x i8>* %b
+  %1 = load <64 x i8>, ptr %a
+  %2 = load <64 x i8>, ptr %b
   %3 = zext <64 x i8> %1 to <64 x i32>
   %4 = zext <64 x i8> %2 to <64 x i32>
   %5 = add nuw nsw <64 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %6 = add nuw nsw <64 x i32> %5, %4
   %7 = lshr <64 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %8 = trunc <64 x i32> %7 to <64 x i8>
-  store <64 x i8> %8, <64 x i8>* undef, align 4
+  store <64 x i8> %8, ptr undef, align 4
   ret void
 }
 
 
-define dso_local void @avg_v64i8_512(<64 x i8>* %a, <64 x i8>* %b) "min-legal-vector-width"="512" {
+define dso_local void @avg_v64i8_512(ptr %a, ptr %b) "min-legal-vector-width"="512" {
 ; CHECK-LABEL: avg_v64i8_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa64 (%rdi), %zmm0
@@ -79,19 +77,19 @@ define dso_local void @avg_v64i8_512(<64 x i8>* %a, <64 x i8>* %b) "min-legal-ve
 ; CHECK-NEXT:    vmovdqu64 %zmm0, (%rax)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %1 = load <64 x i8>, <64 x i8>* %a
-  %2 = load <64 x i8>, <64 x i8>* %b
+  %1 = load <64 x i8>, ptr %a
+  %2 = load <64 x i8>, ptr %b
   %3 = zext <64 x i8> %1 to <64 x i32>
   %4 = zext <64 x i8> %2 to <64 x i32>
   %5 = add nuw nsw <64 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %6 = add nuw nsw <64 x i32> %5, %4
   %7 = lshr <64 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %8 = trunc <64 x i32> %7 to <64 x i8>
-  store <64 x i8> %8, <64 x i8>* undef, align 4
+  store <64 x i8> %8, ptr undef, align 4
   ret void
 }
 
-define dso_local void @pmaddwd_32_256(<32 x i16>* %APtr, <32 x i16>* %BPtr, <16 x i32>* %CPtr) "min-legal-vector-width"="256" {
+define dso_local void @pmaddwd_32_256(ptr %APtr, ptr %BPtr, ptr %CPtr) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: pmaddwd_32_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rdi), %ymm0
@@ -102,19 +100,19 @@ define dso_local void @pmaddwd_32_256(<32 x i16>* %APtr, <32 x i16>* %BPtr, <16
 ; CHECK-NEXT:    vmovdqa %ymm1, 32(%rdx)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-   %A = load <32 x i16>, <32 x i16>* %APtr
-   %B = load <32 x i16>, <32 x i16>* %BPtr
+   %A = load <32 x i16>, ptr %APtr
+   %B = load <32 x i16>, ptr %BPtr
    %a = sext <32 x i16> %A to <32 x i32>
    %b = sext <32 x i16> %B to <32 x i32>
    %m = mul nsw <32 x i32> %a, %b
    %odd = shufflevector <32 x i32> %m, <32 x i32> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
    %even = shufflevector <32 x i32> %m, <32 x i32> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
    %ret = add <16 x i32> %odd, %even
-   store <16 x i32> %ret, <16 x i32>* %CPtr
+   store <16 x i32> %ret, ptr %CPtr
    ret void
 }
 
-define dso_local void @pmaddwd_32_512(<32 x i16>* %APtr, <32 x i16>* %BPtr, <16 x i32>* %CPtr) "min-legal-vector-width"="512" {
+define dso_local void @pmaddwd_32_512(ptr %APtr, ptr %BPtr, ptr %CPtr) "min-legal-vector-width"="512" {
 ; CHECK-LABEL: pmaddwd_32_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa64 (%rdi), %zmm0
@@ -122,19 +120,19 @@ define dso_local void @pmaddwd_32_512(<32 x i16>* %APtr, <32 x i16>* %BPtr, <16
 ; CHECK-NEXT:    vmovdqa64 %zmm0, (%rdx)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-   %A = load <32 x i16>, <32 x i16>* %APtr
-   %B = load <32 x i16>, <32 x i16>* %BPtr
+   %A = load <32 x i16>, ptr %APtr
+   %B = load <32 x i16>, ptr %BPtr
    %a = sext <32 x i16> %A to <32 x i32>
    %b = sext <32 x i16> %B to <32 x i32>
    %m = mul nsw <32 x i32> %a, %b
    %odd = shufflevector <32 x i32> %m, <32 x i32> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
    %even = shufflevector <32 x i32> %m, <32 x i32> undef, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
    %ret = add <16 x i32> %odd, %even
-   store <16 x i32> %ret, <16 x i32>* %CPtr
+   store <16 x i32> %ret, ptr %CPtr
    ret void
 }
 
-define dso_local void @psubus_64i8_max_256(<64 x i8>* %xptr, <64 x i8>* %yptr, <64 x i8>* %zptr) "min-legal-vector-width"="256" {
+define dso_local void @psubus_64i8_max_256(ptr %xptr, ptr %yptr, ptr %zptr) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: psubus_64i8_max_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rdi), %ymm0
@@ -145,16 +143,16 @@ define dso_local void @psubus_64i8_max_256(<64 x i8>* %xptr, <64 x i8>* %yptr, <
 ; CHECK-NEXT:    vmovdqa %ymm1, 32(%rdx)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %x = load <64 x i8>, <64 x i8>* %xptr
-  %y = load <64 x i8>, <64 x i8>* %yptr
+  %x = load <64 x i8>, ptr %xptr
+  %y = load <64 x i8>, ptr %yptr
   %cmp = icmp ult <64 x i8> %x, %y
   %max = select <64 x i1> %cmp, <64 x i8> %y, <64 x i8> %x
   %res = sub <64 x i8> %max, %y
-  store <64 x i8> %res, <64 x i8>* %zptr
+  store <64 x i8> %res, ptr %zptr
   ret void
 }
 
-define dso_local void @psubus_64i8_max_512(<64 x i8>* %xptr, <64 x i8>* %yptr, <64 x i8>* %zptr) "min-legal-vector-width"="512" {
+define dso_local void @psubus_64i8_max_512(ptr %xptr, ptr %yptr, ptr %zptr) "min-legal-vector-width"="512" {
 ; CHECK-LABEL: psubus_64i8_max_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa64 (%rdi), %zmm0
@@ -162,16 +160,16 @@ define dso_local void @psubus_64i8_max_512(<64 x i8>* %xptr, <64 x i8>* %yptr, <
 ; CHECK-NEXT:    vmovdqa64 %zmm0, (%rdx)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %x = load <64 x i8>, <64 x i8>* %xptr
-  %y = load <64 x i8>, <64 x i8>* %yptr
+  %x = load <64 x i8>, ptr %xptr
+  %y = load <64 x i8>, ptr %yptr
   %cmp = icmp ult <64 x i8> %x, %y
   %max = select <64 x i1> %cmp, <64 x i8> %y, <64 x i8> %x
   %res = sub <64 x i8> %max, %y
-  store <64 x i8> %res, <64 x i8>* %zptr
+  store <64 x i8> %res, ptr %zptr
   ret void
 }
 
-define dso_local i32 @_Z9test_charPcS_i_256(i8* nocapture readonly, i8* nocapture readonly, i32) "min-legal-vector-width"="256" {
+define dso_local i32 @_Z9test_charPcS_i_256(ptr nocapture readonly, ptr nocapture readonly, i32) "min-legal-vector-width"="256" {
 ; CHECK-SKX-LABEL: _Z9test_charPcS_i_256:
 ; CHECK-SKX:       # %bb.0: # %entry
 ; CHECK-SKX-NEXT:    movl %edx, %eax
@@ -283,13 +281,13 @@ entry:
 vector.body:
   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
   %vec.phi = phi <32 x i32> [ %11, %vector.body ], [ zeroinitializer, %entry ]
-  %4 = getelementptr inbounds i8, i8* %0, i64 %index
-  %5 = bitcast i8* %4 to <32 x i8>*
-  %wide.load = load <32 x i8>, <32 x i8>* %5, align 1
+  %4 = getelementptr inbounds i8, ptr %0, i64 %index
+  %5 = bitcast ptr %4 to ptr
+  %wide.load = load <32 x i8>, ptr %5, align 1
   %6 = sext <32 x i8> %wide.load to <32 x i32>
-  %7 = getelementptr inbounds i8, i8* %1, i64 %index
-  %8 = bitcast i8* %7 to <32 x i8>*
-  %wide.load14 = load <32 x i8>, <32 x i8>* %8, align 1
+  %7 = getelementptr inbounds i8, ptr %1, i64 %index
+  %8 = bitcast ptr %7 to ptr
+  %wide.load14 = load <32 x i8>, ptr %8, align 1
   %9 = sext <32 x i8> %wide.load14 to <32 x i32>
   %10 = mul nsw <32 x i32> %9, %6
   %11 = add nsw <32 x i32> %10, %vec.phi
@@ -312,7 +310,7 @@ middle.block:
   ret i32 %13
 }
 
-define dso_local i32 @_Z9test_charPcS_i_512(i8* nocapture readonly, i8* nocapture readonly, i32) "min-legal-vector-width"="512" {
+define dso_local i32 @_Z9test_charPcS_i_512(ptr nocapture readonly, ptr nocapture readonly, i32) "min-legal-vector-width"="512" {
 ; CHECK-SKX-LABEL: _Z9test_charPcS_i_512:
 ; CHECK-SKX:       # %bb.0: # %entry
 ; CHECK-SKX-NEXT:    movl %edx, %eax
@@ -409,13 +407,13 @@ entry:
 vector.body:
   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
   %vec.phi = phi <32 x i32> [ %11, %vector.body ], [ zeroinitializer, %entry ]
-  %4 = getelementptr inbounds i8, i8* %0, i64 %index
-  %5 = bitcast i8* %4 to <32 x i8>*
-  %wide.load = load <32 x i8>, <32 x i8>* %5, align 1
+  %4 = getelementptr inbounds i8, ptr %0, i64 %index
+  %5 = bitcast ptr %4 to ptr
+  %wide.load = load <32 x i8>, ptr %5, align 1
   %6 = sext <32 x i8> %wide.load to <32 x i32>
-  %7 = getelementptr inbounds i8, i8* %1, i64 %index
-  %8 = bitcast i8* %7 to <32 x i8>*
-  %wide.load14 = load <32 x i8>, <32 x i8>* %8, align 1
+  %7 = getelementptr inbounds i8, ptr %1, i64 %index
+  %8 = bitcast ptr %7 to ptr
+  %wide.load14 = load <32 x i8>, ptr %8, align 1
   %9 = sext <32 x i8> %wide.load14 to <32 x i32>
   %10 = mul nsw <32 x i32> %9, %6
   %11 = add nsw <32 x i32> %10, %vec.phi
@@ -522,13 +520,13 @@ entry:
 vector.body:
   %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
   %vec.phi = phi <16 x i32> [ zeroinitializer, %entry ], [ %10, %vector.body ]
-  %0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %index
-  %1 = bitcast i8* %0 to <16 x i8>*
-  %wide.load = load <16 x i8>, <16 x i8>* %1, align 4
+  %0 = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %index
+  %1 = bitcast ptr %0 to ptr
+  %wide.load = load <16 x i8>, ptr %1, align 4
   %2 = zext <16 x i8> %wide.load to <16 x i32>
-  %3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %index
-  %4 = bitcast i8* %3 to <16 x i8>*
-  %wide.load1 = load <16 x i8>, <16 x i8>* %4, align 4
+  %3 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %index
+  %4 = bitcast ptr %3 to ptr
+  %wide.load1 = load <16 x i8>, ptr %4, align 4
   %5 = zext <16 x i8> %wide.load1 to <16 x i32>
   %6 = sub nsw <16 x i32> %2, %5
   %7 = icmp sgt <16 x i32> %6, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -633,13 +631,13 @@ entry:
 vector.body:
   %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
   %vec.phi = phi <16 x i32> [ zeroinitializer, %entry ], [ %10, %vector.body ]
-  %0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %index
-  %1 = bitcast i8* %0 to <16 x i8>*
-  %wide.load = load <16 x i8>, <16 x i8>* %1, align 4
+  %0 = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %index
+  %1 = bitcast ptr %0 to ptr
+  %wide.load = load <16 x i8>, ptr %1, align 4
   %2 = zext <16 x i8> %wide.load to <16 x i32>
-  %3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %index
-  %4 = bitcast i8* %3 to <16 x i8>*
-  %wide.load1 = load <16 x i8>, <16 x i8>* %4, align 4
+  %3 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %index
+  %4 = bitcast ptr %3 to ptr
+  %wide.load1 = load <16 x i8>, ptr %4, align 4
   %5 = zext <16 x i8> %wide.load1 to <16 x i32>
   %6 = sub nsw <16 x i32> %2, %5
   %7 = icmp sgt <16 x i32> %6, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -663,7 +661,7 @@ middle.block:
   ret i32 %12
 }
 
-define dso_local void @sbto16f32_256(<16 x i16> %a, <16 x float>* %res) "min-legal-vector-width"="256" {
+define dso_local void @sbto16f32_256(<16 x i16> %a, ptr %res) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: sbto16f32_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpmovw2m %ymm0, %k0
@@ -678,11 +676,11 @@ define dso_local void @sbto16f32_256(<16 x i16> %a, <16 x float>* %res) "min-leg
 ; CHECK-NEXT:    retq
   %mask = icmp slt <16 x i16> %a, zeroinitializer
   %1 = sitofp <16 x i1> %mask to <16 x float>
-  store <16 x float> %1, <16 x float>* %res
+  store <16 x float> %1, ptr %res
   ret void
 }
 
-define dso_local void @sbto16f32_512(<16 x i16> %a, <16 x float>* %res) "min-legal-vector-width"="512" {
+define dso_local void @sbto16f32_512(<16 x i16> %a, ptr %res) "min-legal-vector-width"="512" {
 ; CHECK-LABEL: sbto16f32_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpmovw2m %ymm0, %k0
@@ -693,11 +691,11 @@ define dso_local void @sbto16f32_512(<16 x i16> %a, <16 x float>* %res) "min-leg
 ; CHECK-NEXT:    retq
   %mask = icmp slt <16 x i16> %a, zeroinitializer
   %1 = sitofp <16 x i1> %mask to <16 x float>
-  store <16 x float> %1, <16 x float>* %res
+  store <16 x float> %1, ptr %res
   ret void
 }
 
-define dso_local void @sbto16f64_256(<16 x i16> %a, <16 x double>* %res)  "min-legal-vector-width"="256" {
+define dso_local void @sbto16f64_256(<16 x i16> %a, ptr %res)  "min-legal-vector-width"="256" {
 ; CHECK-LABEL: sbto16f64_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpmovw2m %ymm0, %k0
@@ -718,11 +716,11 @@ define dso_local void @sbto16f64_256(<16 x i16> %a, <16 x double>* %res)  "min-l
 ; CHECK-NEXT:    retq
   %mask = icmp slt <16 x i16> %a, zeroinitializer
   %1 = sitofp <16 x i1> %mask to <16 x double>
-  store <16 x double> %1, <16 x double>* %res
+  store <16 x double> %1, ptr %res
   ret void
 }
 
-define dso_local void @sbto16f64_512(<16 x i16> %a, <16 x double>* %res)  "min-legal-vector-width"="512" {
+define dso_local void @sbto16f64_512(<16 x i16> %a, ptr %res)  "min-legal-vector-width"="512" {
 ; CHECK-LABEL: sbto16f64_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpmovw2m %ymm0, %k0
@@ -736,11 +734,11 @@ define dso_local void @sbto16f64_512(<16 x i16> %a, <16 x double>* %res)  "min-l
 ; CHECK-NEXT:    retq
   %mask = icmp slt <16 x i16> %a, zeroinitializer
   %1 = sitofp <16 x i1> %mask to <16 x double>
-  store <16 x double> %1, <16 x double>* %res
+  store <16 x double> %1, ptr %res
   ret void
 }
 
-define dso_local void @ubto16f32_256(<16 x i16> %a, <16 x float>* %res) "min-legal-vector-width"="256" {
+define dso_local void @ubto16f32_256(<16 x i16> %a, ptr %res) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: ubto16f32_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpmovw2m %ymm0, %k0
@@ -757,11 +755,11 @@ define dso_local void @ubto16f32_256(<16 x i16> %a, <16 x float>* %res) "min-leg
 ; CHECK-NEXT:    retq
   %mask = icmp slt <16 x i16> %a, zeroinitializer
   %1 = uitofp <16 x i1> %mask to <16 x float>
-  store <16 x float> %1, <16 x float>* %res
+  store <16 x float> %1, ptr %res
   ret void
 }
 
-define dso_local void @ubto16f32_512(<16 x i16> %a, <16 x float>* %res) "min-legal-vector-width"="512" {
+define dso_local void @ubto16f32_512(<16 x i16> %a, ptr %res) "min-legal-vector-width"="512" {
 ; CHECK-LABEL: ubto16f32_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpmovw2m %ymm0, %k0
@@ -773,11 +771,11 @@ define dso_local void @ubto16f32_512(<16 x i16> %a, <16 x float>* %res) "min-leg
 ; CHECK-NEXT:    retq
   %mask = icmp slt <16 x i16> %a, zeroinitializer
   %1 = uitofp <16 x i1> %mask to <16 x float>
-  store <16 x float> %1, <16 x float>* %res
+  store <16 x float> %1, ptr %res
   ret void
 }
 
-define dso_local void @ubto16f64_256(<16 x i16> %a, <16 x double>* %res) "min-legal-vector-width"="256" {
+define dso_local void @ubto16f64_256(<16 x i16> %a, ptr %res) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: ubto16f64_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpmovw2m %ymm0, %k0
@@ -800,11 +798,11 @@ define dso_local void @ubto16f64_256(<16 x i16> %a, <16 x double>* %res) "min-le
 ; CHECK-NEXT:    retq
   %mask = icmp slt <16 x i16> %a, zeroinitializer
   %1 = uitofp <16 x i1> %mask to <16 x double>
-  store <16 x double> %1, <16 x double>* %res
+  store <16 x double> %1, ptr %res
   ret void
 }
 
-define dso_local void @ubto16f64_512(<16 x i16> %a, <16 x double>* %res) "min-legal-vector-width"="512" {
+define dso_local void @ubto16f64_512(<16 x i16> %a, ptr %res) "min-legal-vector-width"="512" {
 ; CHECK-LABEL: ubto16f64_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpmovw2m %ymm0, %k0
@@ -819,11 +817,11 @@ define dso_local void @ubto16f64_512(<16 x i16> %a, <16 x double>* %res) "min-le
 ; CHECK-NEXT:    retq
   %mask = icmp slt <16 x i16> %a, zeroinitializer
   %1 = uitofp <16 x i1> %mask to <16 x double>
-  store <16 x double> %1, <16 x double>* %res
+  store <16 x double> %1, ptr %res
   ret void
 }
 
-define <16 x i16> @test_16f32toub_256(<16 x float>* %ptr, <16 x i16> %passthru) "min-legal-vector-width"="256" {
+define <16 x i16> @test_16f32toub_256(ptr %ptr, <16 x i16> %passthru) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: test_16f32toub_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vcvttps2dq (%rdi), %ymm1
@@ -835,13 +833,13 @@ define <16 x i16> @test_16f32toub_256(<16 x float>* %ptr, <16 x i16> %passthru)
 ; CHECK-NEXT:    kunpckbw %k0, %k1, %k1
 ; CHECK-NEXT:    vmovdqu16 %ymm0, %ymm0 {%k1} {z}
 ; CHECK-NEXT:    retq
-  %a = load <16 x float>, <16 x float>* %ptr
+  %a = load <16 x float>, ptr %ptr
   %mask = fptoui <16 x float> %a to <16 x i1>
   %select = select <16 x i1> %mask, <16 x i16> %passthru, <16 x i16> zeroinitializer
   ret <16 x i16> %select
 }
 
-define <16 x i16> @test_16f32toub_512(<16 x float>* %ptr, <16 x i16> %passthru) "min-legal-vector-width"="512" {
+define <16 x i16> @test_16f32toub_512(ptr %ptr, <16 x i16> %passthru) "min-legal-vector-width"="512" {
 ; CHECK-LABEL: test_16f32toub_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vcvttps2dq (%rdi), %zmm1
@@ -849,13 +847,13 @@ define <16 x i16> @test_16f32toub_512(<16 x float>* %ptr, <16 x i16> %passthru)
 ; CHECK-NEXT:    vpmovd2m %zmm1, %k1
 ; CHECK-NEXT:    vmovdqu16 %ymm0, %ymm0 {%k1} {z}
 ; CHECK-NEXT:    retq
-  %a = load <16 x float>, <16 x float>* %ptr
+  %a = load <16 x float>, ptr %ptr
   %mask = fptoui <16 x float> %a to <16 x i1>
   %select = select <16 x i1> %mask, <16 x i16> %passthru, <16 x i16> zeroinitializer
   ret <16 x i16> %select
 }
 
-define <16 x i16> @test_16f32tosb_256(<16 x float>* %ptr, <16 x i16> %passthru) "min-legal-vector-width"="256" {
+define <16 x i16> @test_16f32tosb_256(ptr %ptr, <16 x i16> %passthru) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: test_16f32tosb_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vcvttps2dq (%rdi), %ymm1
@@ -865,26 +863,26 @@ define <16 x i16> @test_16f32tosb_256(<16 x float>* %ptr, <16 x i16> %passthru)
 ; CHECK-NEXT:    kunpckbw %k0, %k1, %k1
 ; CHECK-NEXT:    vmovdqu16 %ymm0, %ymm0 {%k1} {z}
 ; CHECK-NEXT:    retq
-  %a = load <16 x float>, <16 x float>* %ptr
+  %a = load <16 x float>, ptr %ptr
   %mask = fptosi <16 x float> %a to <16 x i1>
   %select = select <16 x i1> %mask, <16 x i16> %passthru, <16 x i16> zeroinitializer
   ret <16 x i16> %select
 }
 
-define <16 x i16> @test_16f32tosb_512(<16 x float>* %ptr, <16 x i16> %passthru) "min-legal-vector-width"="512" {
+define <16 x i16> @test_16f32tosb_512(ptr %ptr, <16 x i16> %passthru) "min-legal-vector-width"="512" {
 ; CHECK-LABEL: test_16f32tosb_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vcvttps2dq (%rdi), %zmm1
 ; CHECK-NEXT:    vpmovd2m %zmm1, %k1
 ; CHECK-NEXT:    vmovdqu16 %ymm0, %ymm0 {%k1} {z}
 ; CHECK-NEXT:    retq
-  %a = load <16 x float>, <16 x float>* %ptr
+  %a = load <16 x float>, ptr %ptr
   %mask = fptosi <16 x float> %a to <16 x i1>
   %select = select <16 x i1> %mask, <16 x i16> %passthru, <16 x i16> zeroinitializer
   ret <16 x i16> %select
 }
 
-define dso_local void @mul256(<64 x i8>* %a, <64 x i8>* %b, <64 x i8>* %c) "min-legal-vector-width"="256" {
+define dso_local void @mul256(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"="256" {
 ; CHECK-SKX-VBMI-LABEL: mul256:
 ; CHECK-SKX-VBMI:       # %bb.0:
 ; CHECK-SKX-VBMI-NEXT:    vmovdqa (%rdi), %ymm0
@@ -966,14 +964,14 @@ define dso_local void @mul256(<64 x i8>* %a, <64 x i8>* %b, <64 x i8>* %c) "min-
 ; CHECK-VBMI-NEXT:    vmovdqa %ymm1, 32(%rdx)
 ; CHECK-VBMI-NEXT:    vzeroupper
 ; CHECK-VBMI-NEXT:    retq
-  %d = load <64 x i8>, <64 x i8>* %a
-  %e = load <64 x i8>, <64 x i8>* %b
+  %d = load <64 x i8>, ptr %a
+  %e = load <64 x i8>, ptr %b
   %f = mul <64 x i8> %d, %e
-  store <64 x i8> %f, <64 x i8>* %c
+  store <64 x i8> %f, ptr %c
   ret void
 }
 
-define dso_local void @mul512(<64 x i8>* %a, <64 x i8>* %b, <64 x i8>* %c) "min-legal-vector-width"="512" {
+define dso_local void @mul512(ptr %a, ptr %b, ptr %c) "min-legal-vector-width"="512" {
 ; CHECK-SKX-VBMI-LABEL: mul512:
 ; CHECK-SKX-VBMI:       # %bb.0:
 ; CHECK-SKX-VBMI-NEXT:    vmovdqa64 (%rdi), %zmm0
@@ -1023,27 +1021,27 @@ define dso_local void @mul512(<64 x i8>* %a, <64 x i8>* %b, <64 x i8>* %c) "min-
 ; CHECK-VBMI-NEXT:    vmovdqa64 %zmm1, (%rdx)
 ; CHECK-VBMI-NEXT:    vzeroupper
 ; CHECK-VBMI-NEXT:    retq
-  %d = load <64 x i8>, <64 x i8>* %a
-  %e = load <64 x i8>, <64 x i8>* %b
+  %d = load <64 x i8>, ptr %a
+  %e = load <64 x i8>, ptr %b
   %f = mul <64 x i8> %d, %e
-  store <64 x i8> %f, <64 x i8>* %c
+  store <64 x i8> %f, ptr %c
   ret void
 }
 
 ; This threw an assertion at one point.
-define <4 x i32> @mload_v4i32(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) "min-legal-vector-width"="256" {
+define <4 x i32> @mload_v4i32(<4 x i32> %trigger, ptr %addr, <4 x i32> %dst) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: mload_v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
 ; CHECK-NEXT:    vpblendmd (%rdi), %xmm1, %xmm0 {%k1}
 ; CHECK-NEXT:    retq
   %mask = icmp eq <4 x i32> %trigger, zeroinitializer
-  %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
+  %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst)
   ret <4 x i32> %res
 }
-declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
+declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>)
 
-define <16 x i32> @trunc_v16i64_v16i32(<16 x i64>* %x) nounwind "min-legal-vector-width"="256" {
+define <16 x i32> @trunc_v16i64_v16i32(ptr %x) nounwind "min-legal-vector-width"="256" {
 ; CHECK-LABEL: trunc_v16i64_v16i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rdi), %ymm0
@@ -1057,12 +1055,12 @@ define <16 x i32> @trunc_v16i64_v16i32(<16 x i64>* %x) nounwind "min-legal-vecto
 ; CHECK-NEXT:    vpmovqd %ymm3, %xmm2
 ; CHECK-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; CHECK-NEXT:    retq
-  %a = load <16 x i64>, <16 x i64>* %x
+  %a = load <16 x i64>, ptr %x
   %b = trunc <16 x i64> %a to <16 x i32>
   ret <16 x i32> %b
 }
 
-define <16 x i8> @trunc_v16i64_v16i8(<16 x i64>* %x) nounwind "min-legal-vector-width"="256" {
+define <16 x i8> @trunc_v16i64_v16i8(ptr %x) nounwind "min-legal-vector-width"="256" {
 ; CHECK-LABEL: trunc_v16i64_v16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rdi), %ymm0
@@ -1078,12 +1076,12 @@ define <16 x i8> @trunc_v16i64_v16i8(<16 x i64>* %x) nounwind "min-legal-vector-
 ; CHECK-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %a = load <16 x i64>, <16 x i64>* %x
+  %a = load <16 x i64>, ptr %x
   %b = trunc <16 x i64> %a to <16 x i8>
   ret <16 x i8> %b
 }
 
-define <16 x i8> @trunc_v16i32_v16i8(<16 x i32>* %x) nounwind "min-legal-vector-width"="256" {
+define <16 x i8> @trunc_v16i32_v16i8(ptr %x) nounwind "min-legal-vector-width"="256" {
 ; CHECK-LABEL: trunc_v16i32_v16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rdi), %ymm0
@@ -1093,12 +1091,12 @@ define <16 x i8> @trunc_v16i32_v16i8(<16 x i32>* %x) nounwind "min-legal-vector-
 ; CHECK-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %a = load <16 x i32>, <16 x i32>* %x
+  %a = load <16 x i32>, ptr %x
   %b = trunc <16 x i32> %a to <16 x i8>
   ret <16 x i8> %b
 }
 
-define <8 x i8> @trunc_v8i64_v8i8(<8 x i64>* %x) nounwind "min-legal-vector-width"="256" {
+define <8 x i8> @trunc_v8i64_v8i8(ptr %x) nounwind "min-legal-vector-width"="256" {
 ; CHECK-LABEL: trunc_v8i64_v8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rdi), %ymm0
@@ -1108,12 +1106,12 @@ define <8 x i8> @trunc_v8i64_v8i8(<8 x i64>* %x) nounwind "min-legal-vector-widt
 ; CHECK-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %a = load <8 x i64>, <8 x i64>* %x
+  %a = load <8 x i64>, ptr %x
   %b = trunc <8 x i64> %a to <8 x i8>
   ret <8 x i8> %b
 }
 
-define <8 x i16> @trunc_v8i64_v8i16(<8 x i64>* %x) nounwind "min-legal-vector-width"="256" {
+define <8 x i16> @trunc_v8i64_v8i16(ptr %x) nounwind "min-legal-vector-width"="256" {
 ; CHECK-LABEL: trunc_v8i64_v8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rdi), %ymm0
@@ -1123,12 +1121,12 @@ define <8 x i16> @trunc_v8i64_v8i16(<8 x i64>* %x) nounwind "min-legal-vector-wi
 ; CHECK-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %a = load <8 x i64>, <8 x i64>* %x
+  %a = load <8 x i64>, ptr %x
   %b = trunc <8 x i64> %a to <8 x i16>
   ret <8 x i16> %b
 }
 
-define <8 x i32> @trunc_v8i64_v8i32_zeroes(<8 x i64>* %x) nounwind "min-legal-vector-width"="256" {
+define <8 x i32> @trunc_v8i64_v8i32_zeroes(ptr %x) nounwind "min-legal-vector-width"="256" {
 ; CHECK-LABEL: trunc_v8i64_v8i32_zeroes:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpsrlq $48, 32(%rdi), %ymm0
@@ -1136,26 +1134,26 @@ define <8 x i32> @trunc_v8i64_v8i32_zeroes(<8 x i64>* %x) nounwind "min-legal-ve
 ; CHECK-NEXT:    vpackusdw %ymm0, %ymm1, %ymm0
 ; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
 ; CHECK-NEXT:    retq
-  %a = load <8 x i64>, <8 x i64>* %x
+  %a = load <8 x i64>, ptr %x
   %b = lshr <8 x i64> %a, <i64 48, i64 48, i64 48, i64 48, i64 48, i64 48, i64 48, i64 48>
   %c = trunc <8 x i64> %b to <8 x i32>
   ret <8 x i32> %c
 }
 
-define <16 x i16> @trunc_v16i32_v16i16_zeroes(<16 x i32>* %x) nounwind "min-legal-vector-width"="256" {
+define <16 x i16> @trunc_v16i32_v16i16_zeroes(ptr %x) nounwind "min-legal-vector-width"="256" {
 ; CHECK-LABEL: trunc_v16i32_v16i16_zeroes:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rdi), %ymm1
 ; CHECK-NEXT:    vmovdqa {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31]
 ; CHECK-NEXT:    vpermi2w 32(%rdi), %ymm1, %ymm0
 ; CHECK-NEXT:    retq
-  %a = load <16 x i32>, <16 x i32>* %x
+  %a = load <16 x i32>, ptr %x
   %b = lshr <16 x i32> %a, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
   %c = trunc <16 x i32> %b to <16 x i16>
   ret <16 x i16> %c
 }
 
-define <32 x i8> @trunc_v32i16_v32i8_zeroes(<32 x i16>* %x) nounwind "min-legal-vector-width"="256" {
+define <32 x i8> @trunc_v32i16_v32i8_zeroes(ptr %x) nounwind "min-legal-vector-width"="256" {
 ; CHECK-SKX-VBMI-LABEL: trunc_v32i16_v32i8_zeroes:
 ; CHECK-SKX-VBMI:       # %bb.0:
 ; CHECK-SKX-VBMI-NEXT:    vmovdqa (%rdi), %ymm1
@@ -1177,13 +1175,13 @@ define <32 x i8> @trunc_v32i16_v32i8_zeroes(<32 x i16>* %x) nounwind "min-legal-
 ; CHECK-VBMI-NEXT:    vmovdqa {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63]
 ; CHECK-VBMI-NEXT:    vpermi2b 32(%rdi), %ymm1, %ymm0
 ; CHECK-VBMI-NEXT:    retq
-  %a = load <32 x i16>, <32 x i16>* %x
+  %a = load <32 x i16>, ptr %x
   %b = lshr <32 x i16> %a, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
   %c = trunc <32 x i16> %b to <32 x i8>
   ret <32 x i8> %c
 }
 
-define <8 x i32> @trunc_v8i64_v8i32_sign(<8 x i64>* %x) nounwind "min-legal-vector-width"="256" {
+define <8 x i32> @trunc_v8i64_v8i32_sign(ptr %x) nounwind "min-legal-vector-width"="256" {
 ; CHECK-LABEL: trunc_v8i64_v8i32_sign:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpsraq $48, 32(%rdi), %ymm0
@@ -1191,26 +1189,26 @@ define <8 x i32> @trunc_v8i64_v8i32_sign(<8 x i64>* %x) nounwind "min-legal-vect
 ; CHECK-NEXT:    vpackssdw %ymm0, %ymm1, %ymm0
 ; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
 ; CHECK-NEXT:    retq
-  %a = load <8 x i64>, <8 x i64>* %x
+  %a = load <8 x i64>, ptr %x
   %b = ashr <8 x i64> %a, <i64 48, i64 48, i64 48, i64 48, i64 48, i64 48, i64 48, i64 48>
   %c = trunc <8 x i64> %b to <8 x i32>
   ret <8 x i32> %c
 }
 
-define <16 x i16> @trunc_v16i32_v16i16_sign(<16 x i32>* %x) nounwind "min-legal-vector-width"="256" {
+define <16 x i16> @trunc_v16i32_v16i16_sign(ptr %x) nounwind "min-legal-vector-width"="256" {
 ; CHECK-LABEL: trunc_v16i32_v16i16_sign:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rdi), %ymm1
 ; CHECK-NEXT:    vmovdqa {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31]
 ; CHECK-NEXT:    vpermi2w 32(%rdi), %ymm1, %ymm0
 ; CHECK-NEXT:    retq
-  %a = load <16 x i32>, <16 x i32>* %x
+  %a = load <16 x i32>, ptr %x
   %b = ashr <16 x i32> %a, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
   %c = trunc <16 x i32> %b to <16 x i16>
   ret <16 x i16> %c
 }
 
-define <32 x i8> @trunc_v32i16_v32i8_sign(<32 x i16>* %x) nounwind "min-legal-vector-width"="256" {
+define <32 x i8> @trunc_v32i16_v32i8_sign(ptr %x) nounwind "min-legal-vector-width"="256" {
 ; CHECK-SKX-VBMI-LABEL: trunc_v32i16_v32i8_sign:
 ; CHECK-SKX-VBMI:       # %bb.0:
 ; CHECK-SKX-VBMI-NEXT:    vmovdqa (%rdi), %ymm1
@@ -1232,13 +1230,13 @@ define <32 x i8> @trunc_v32i16_v32i8_sign(<32 x i16>* %x) nounwind "min-legal-ve
 ; CHECK-VBMI-NEXT:    vmovdqa {{.*#+}} ymm0 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63]
 ; CHECK-VBMI-NEXT:    vpermi2b 32(%rdi), %ymm1, %ymm0
 ; CHECK-VBMI-NEXT:    retq
-  %a = load <32 x i16>, <32 x i16>* %x
+  %a = load <32 x i16>, ptr %x
   %b = ashr <32 x i16> %a, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
   %c = trunc <32 x i16> %b to <32 x i8>
   ret <32 x i8> %c
 }
 
-define dso_local void @zext_v16i8_v16i64(<16 x i8> %x, <16 x i64>* %y) nounwind "min-legal-vector-width"="256" {
+define dso_local void @zext_v16i8_v16i64(<16 x i8> %x, ptr %y) nounwind "min-legal-vector-width"="256" {
 ; CHECK-LABEL: zext_v16i8_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
@@ -1256,11 +1254,11 @@ define dso_local void @zext_v16i8_v16i64(<16 x i8> %x, <16 x i64>* %y) nounwind
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
   %a = zext <16 x i8> %x to <16 x i64>
-  store <16 x i64> %a, <16 x i64>* %y
+  store <16 x i64> %a, ptr %y
   ret void
 }
 
-define dso_local void @sext_v16i8_v16i64(<16 x i8> %x, <16 x i64>* %y) nounwind "min-legal-vector-width"="256" {
+define dso_local void @sext_v16i8_v16i64(<16 x i8> %x, ptr %y) nounwind "min-legal-vector-width"="256" {
 ; CHECK-LABEL: sext_v16i8_v16i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm1
@@ -1278,11 +1276,11 @@ define dso_local void @sext_v16i8_v16i64(<16 x i8> %x, <16 x i64>* %y) nounwind
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
   %a = sext <16 x i8> %x to <16 x i64>
-  store <16 x i64> %a, <16 x i64>* %y
+  store <16 x i64> %a, ptr %y
   ret void
 }
 
-define dso_local void @vselect_split_v8i16_setcc(<8 x i16> %s, <8 x i16> %t, <8 x i64>* %p, <8 x i64>* %q, <8 x i64>* %r) "min-legal-vector-width"="256" {
+define dso_local void @vselect_split_v8i16_setcc(<8 x i16> %s, <8 x i16> %t, ptr %p, ptr %q, ptr %r) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: vselect_split_v8i16_setcc:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rsi), %ymm2
@@ -1295,15 +1293,15 @@ define dso_local void @vselect_split_v8i16_setcc(<8 x i16> %s, <8 x i16> %t, <8
 ; CHECK-NEXT:    vmovdqa %ymm3, 32(%rdx)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %x = load <8 x i64>, <8 x i64>* %p
-  %y = load <8 x i64>, <8 x i64>* %q
+  %x = load <8 x i64>, ptr %p
+  %y = load <8 x i64>, ptr %q
   %a = icmp eq <8 x i16> %s, %t
   %b = select <8 x i1> %a, <8 x i64> %x, <8 x i64> %y
-  store <8 x i64> %b, <8 x i64>* %r
+  store <8 x i64> %b, ptr %r
   ret void
 }
 
-define dso_local void @vselect_split_v8i32_setcc(<8 x i32> %s, <8 x i32> %t, <8 x i64>* %p, <8 x i64>* %q, <8 x i64>* %r) "min-legal-vector-width"="256" {
+define dso_local void @vselect_split_v8i32_setcc(<8 x i32> %s, <8 x i32> %t, ptr %p, ptr %q, ptr %r) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: vselect_split_v8i32_setcc:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rsi), %ymm2
@@ -1316,15 +1314,15 @@ define dso_local void @vselect_split_v8i32_setcc(<8 x i32> %s, <8 x i32> %t, <8
 ; CHECK-NEXT:    vmovdqa %ymm3, 32(%rdx)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %x = load <8 x i64>, <8 x i64>* %p
-  %y = load <8 x i64>, <8 x i64>* %q
+  %x = load <8 x i64>, ptr %p
+  %y = load <8 x i64>, ptr %q
   %a = icmp eq <8 x i32> %s, %t
   %b = select <8 x i1> %a, <8 x i64> %x, <8 x i64> %y
-  store <8 x i64> %b, <8 x i64>* %r
+  store <8 x i64> %b, ptr %r
   ret void
 }
 
-define dso_local void @vselect_split_v16i8_setcc(<16 x i8> %s, <16 x i8> %t, <16 x i32>* %p, <16 x i32>* %q, <16 x i32>* %r) "min-legal-vector-width"="256" {
+define dso_local void @vselect_split_v16i8_setcc(<16 x i8> %s, <16 x i8> %t, ptr %p, ptr %q, ptr %r) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: vselect_split_v16i8_setcc:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rsi), %ymm2
@@ -1337,15 +1335,15 @@ define dso_local void @vselect_split_v16i8_setcc(<16 x i8> %s, <16 x i8> %t, <16
 ; CHECK-NEXT:    vmovdqa %ymm3, 32(%rdx)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %x = load <16 x i32>, <16 x i32>* %p
-  %y = load <16 x i32>, <16 x i32>* %q
+  %x = load <16 x i32>, ptr %p
+  %y = load <16 x i32>, ptr %q
   %a = icmp eq <16 x i8> %s, %t
   %b = select <16 x i1> %a, <16 x i32> %x, <16 x i32> %y
-  store <16 x i32> %b, <16 x i32>* %r
+  store <16 x i32> %b, ptr %r
   ret void
 }
 
-define dso_local void @vselect_split_v16i16_setcc(<16 x i16> %s, <16 x i16> %t, <16 x i32>* %p, <16 x i32>* %q, <16 x i32>* %r) "min-legal-vector-width"="256" {
+define dso_local void @vselect_split_v16i16_setcc(<16 x i16> %s, <16 x i16> %t, ptr %p, ptr %q, ptr %r) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: vselect_split_v16i16_setcc:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rsi), %ymm2
@@ -1358,15 +1356,15 @@ define dso_local void @vselect_split_v16i16_setcc(<16 x i16> %s, <16 x i16> %t,
 ; CHECK-NEXT:    vmovdqa %ymm3, 32(%rdx)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %x = load <16 x i32>, <16 x i32>* %p
-  %y = load <16 x i32>, <16 x i32>* %q
+  %x = load <16 x i32>, ptr %p
+  %y = load <16 x i32>, ptr %q
   %a = icmp eq <16 x i16> %s, %t
   %b = select <16 x i1> %a, <16 x i32> %x, <16 x i32> %y
-  store <16 x i32> %b, <16 x i32>* %r
+  store <16 x i32> %b, ptr %r
   ret void
 }
 
-define <16 x i8> @trunc_packus_v16i32_v16i8(<16 x i32>* %p) "min-legal-vector-width"="256" {
+define <16 x i8> @trunc_packus_v16i32_v16i8(ptr %p) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: trunc_packus_v16i32_v16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rdi), %ymm0
@@ -1375,7 +1373,7 @@ define <16 x i8> @trunc_packus_v16i32_v16i8(<16 x i32>* %p) "min-legal-vector-wi
 ; CHECK-NEXT:    vpmovuswb %ymm0, %xmm0
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %a = load <16 x i32>, <16 x i32>* %p
+  %a = load <16 x i32>, ptr %p
   %b = icmp slt <16 x i32> %a, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
   %c = select <16 x i1> %b, <16 x i32> %a, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
   %d = icmp sgt <16 x i32> %c, zeroinitializer
@@ -1384,7 +1382,7 @@ define <16 x i8> @trunc_packus_v16i32_v16i8(<16 x i32>* %p) "min-legal-vector-wi
   ret <16 x i8> %f
 }
 
-define dso_local void @trunc_packus_v16i32_v16i8_store(<16 x i32>* %p, <16 x i8>* %q) "min-legal-vector-width"="256" {
+define dso_local void @trunc_packus_v16i32_v16i8_store(ptr %p, ptr %q) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: trunc_packus_v16i32_v16i8_store:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rdi), %ymm0
@@ -1393,13 +1391,13 @@ define dso_local void @trunc_packus_v16i32_v16i8_store(<16 x i32>* %p, <16 x i8>
 ; CHECK-NEXT:    vpmovuswb %ymm0, (%rsi)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %a = load <16 x i32>, <16 x i32>* %p
+  %a = load <16 x i32>, ptr %p
   %b = icmp slt <16 x i32> %a, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
   %c = select <16 x i1> %b, <16 x i32> %a, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
   %d = icmp sgt <16 x i32> %c, zeroinitializer
   %e = select <16 x i1> %d, <16 x i32> %c, <16 x i32> zeroinitializer
   %f = trunc <16 x i32> %e to <16 x i8>
-  store <16 x i8> %f, <16 x i8>* %q
+  store <16 x i8> %f, ptr %q
   ret void
 }
 
@@ -1410,7 +1408,7 @@ define <64 x i1> @v64i1_argument_return(<64 x i1> %x) "min-legal-vector-width"="
   ret <64 x i1> %x
 }
 
-define dso_local void @v64i1_shuffle(<64 x i8>* %x, <64 x i8>* %y) "min-legal-vector-width"="256" {
+define dso_local void @v64i1_shuffle(ptr %x, ptr %y) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: v64i1_shuffle:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmovdqa (%rdi), %ymm1
@@ -1859,13 +1857,13 @@ define dso_local void @v64i1_shuffle(<64 x i8>* %x, <64 x i8>* %y) "min-legal-ve
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
 entry:
-  %a = load <64 x i8>, <64 x i8>* %x
+  %a = load <64 x i8>, ptr %x
   %b = icmp eq <64 x i8> %a, zeroinitializer
   %shuf = shufflevector <64 x i1> %b, <64 x i1> undef, <64 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14, i32 17, i32 16, i32 19, i32 18, i32 21, i32 20, i32 23, i32 22, i32 25, i32 24, i32 27, i32 26, i32 29, i32 28, i32 31, i32 30, i32 33, i32 32, i32 35, i32 34, i32 37, i32 36, i32 39, i32 38, i32 41, i32 40, i32 43, i32 42, i32 45, i32 44, i32 47, i32 46, i32 49, i32 48, i32 51, i32 50, i32 53, i32 52, i32 55, i32 54, i32 57, i32 56, i32 59, i32 58, i32 61, i32 60, i32 63, i32 62>
-  call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> %a, <64 x i8>* %y, i32 1, <64 x i1> %shuf)
+  call void @llvm.masked.store.v64i8.p0(<64 x i8> %a, ptr %y, i32 1, <64 x i1> %shuf)
   ret void
 }
-declare void @llvm.masked.store.v64i8.p0v64i8(<64 x i8>, <64 x i8>*, i32, <64 x i1>)
+declare void @llvm.masked.store.v64i8.p0(<64 x i8>, ptr, i32, <64 x i1>)
 
 @mem64_dst = dso_local global i64 0, align 8
 @mem64_src = dso_local global i64 0, align 8
@@ -1879,14 +1877,14 @@ define dso_local i32 @v64i1_inline_asm() "min-legal-vector-width"="256" {
 ; CHECK-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
 ; CHECK-NEXT:    retq
   %1 = alloca i32, align 4
-  %2 = load i64, i64* @mem64_src, align 8
+  %2 = load i64, ptr @mem64_src, align 8
   %3 = call i64 asm "", "=k,k,~{dirflag},~{fpsr},~{flags}"(i64 %2)
-  store i64 %3, i64* @mem64_dst, align 8
-  %4 = load i32, i32* %1, align 4
+  store i64 %3, ptr @mem64_dst, align 8
+  %4 = load i32, ptr %1, align 4
   ret i32 %4
 }
 
-define dso_local void @cmp_v8i64_sext(<8 x i64>* %xptr, <8 x i64>* %yptr, <8 x i64>* %zptr) "min-legal-vector-width"="256" {
+define dso_local void @cmp_v8i64_sext(ptr %xptr, ptr %yptr, ptr %zptr) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: cmp_v8i64_sext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rsi), %ymm0
@@ -1897,15 +1895,15 @@ define dso_local void @cmp_v8i64_sext(<8 x i64>* %xptr, <8 x i64>* %yptr, <8 x i
 ; CHECK-NEXT:    vmovdqa %ymm1, 32(%rdx)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %x = load <8 x i64>, <8 x i64>* %xptr
-  %y = load <8 x i64>, <8 x i64>* %yptr
+  %x = load <8 x i64>, ptr %xptr
+  %y = load <8 x i64>, ptr %yptr
   %cmp = icmp slt <8 x i64> %x, %y
   %ext = sext <8 x i1> %cmp to <8 x i64>
-  store <8 x i64> %ext, <8 x i64>* %zptr
+  store <8 x i64> %ext, ptr %zptr
   ret void
 }
 
-define dso_local void @cmp_v8i64_zext(<8 x i64>* %xptr, <8 x i64>* %yptr, <8 x i64>* %zptr) "min-legal-vector-width"="256" {
+define dso_local void @cmp_v8i64_zext(ptr %xptr, ptr %yptr, ptr %zptr) "min-legal-vector-width"="256" {
 ; CHECK-LABEL: cmp_v8i64_zext:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vmovdqa (%rsi), %ymm0
@@ -1918,11 +1916,11 @@ define dso_local void @cmp_v8i64_zext(<8 x i64>* %xptr, <8 x i64>* %yptr, <8 x i
 ; CHECK-NEXT:    vmovdqa %ymm1, 32(%rdx)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-  %x = load <8 x i64>, <8 x i64>* %xptr
-  %y = load <8 x i64>, <8 x i64>* %yptr
+  %x = load <8 x i64>, ptr %xptr
+  %y = load <8 x i64>, ptr %yptr
   %cmp = icmp slt <8 x i64> %x, %y
   %ext = zext <8 x i1> %cmp to <8 x i64>
-  store <8 x i64> %ext, <8 x i64>* %zptr
+  store <8 x i64> %ext, ptr %zptr
   ret void
 }
 
diff --git a/llvm/test/CodeGen/X86/sad.ll b/llvm/test/CodeGen/X86/sad.ll
index cbe63a7c7b456..c0cc371fcb75f 100644
--- a/llvm/test/CodeGen/X86/sad.ll
+++ b/llvm/test/CodeGen/X86/sad.ll
@@ -1,11 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; ifndef INTEL_SYCL_OPAQUEPOINTER_READY
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefixes=AVX,AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
-; end
 
 @a = dso_local global [1024 x i8] zeroinitializer, align 16
 @b = dso_local global [1024 x i8] zeroinitializer, align 16
@@ -119,13 +117,13 @@ entry:
 vector.body:
   %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
   %vec.phi = phi <16 x i32> [ zeroinitializer, %entry ], [ %10, %vector.body ]
-  %0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %index
-  %1 = bitcast i8* %0 to <16 x i8>*
-  %wide.load = load <16 x i8>, <16 x i8>* %1, align 4
+  %0 = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %index
+  %1 = bitcast ptr %0 to ptr
+  %wide.load = load <16 x i8>, ptr %1, align 4
   %2 = zext <16 x i8> %wide.load to <16 x i32>
-  %3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %index
-  %4 = bitcast i8* %3 to <16 x i8>*
-  %wide.load1 = load <16 x i8>, <16 x i8>* %4, align 4
+  %3 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %index
+  %4 = bitcast ptr %3 to ptr
+  %wide.load1 = load <16 x i8>, ptr %4, align 4
   %5 = zext <16 x i8> %wide.load1 to <16 x i32>
   %6 = sub nsw <16 x i32> %2, %5
   %7 = icmp sgt <16 x i32> %6, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -276,13 +274,13 @@ entry:
 vector.body:
   %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
   %vec.phi = phi <32 x i32> [ zeroinitializer, %entry ], [ %10, %vector.body ]
-  %0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %index
-  %1 = bitcast i8* %0 to <32 x i8>*
-  %wide.load = load <32 x i8>, <32 x i8>* %1, align 32
+  %0 = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %index
+  %1 = bitcast ptr %0 to ptr
+  %wide.load = load <32 x i8>, ptr %1, align 32
   %2 = zext <32 x i8> %wide.load to <32 x i32>
-  %3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %index
-  %4 = bitcast i8* %3 to <32 x i8>*
-  %wide.load1 = load <32 x i8>, <32 x i8>* %4, align 32
+  %3 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %index
+  %4 = bitcast ptr %3 to ptr
+  %wide.load1 = load <32 x i8>, ptr %4, align 32
   %5 = zext <32 x i8> %wide.load1 to <32 x i32>
   %6 = sub nsw <32 x i32> %2, %5
   %7 = icmp sgt <32 x i32> %6, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -507,13 +505,13 @@ entry:
 vector.body:
   %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
   %vec.phi = phi <64 x i32> [ zeroinitializer, %entry ], [ %10, %vector.body ]
-  %0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %index
-  %1 = bitcast i8* %0 to <64 x i8>*
-  %wide.load = load <64 x i8>, <64 x i8>* %1, align 64
+  %0 = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %index
+  %1 = bitcast ptr %0 to ptr
+  %wide.load = load <64 x i8>, ptr %1, align 64
   %2 = zext <64 x i8> %wide.load to <64 x i32>
-  %3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %index
-  %4 = bitcast i8* %3 to <64 x i8>*
-  %wide.load1 = load <64 x i8>, <64 x i8>* %4, align 64
+  %3 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %index
+  %4 = bitcast ptr %3 to ptr
+  %wide.load1 = load <64 x i8>, ptr %4, align 64
   %5 = zext <64 x i8> %wide.load1 to <64 x i32>
   %6 = sub nsw <64 x i32> %2, %5
   %7 = icmp sgt <64 x i32> %6, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -591,13 +589,13 @@ entry:
 vector.body:
   %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
   %vec.phi = phi <2 x i32> [ zeroinitializer, %entry ], [ %10, %vector.body ]
-  %0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %index
-  %1 = bitcast i8* %0 to <2 x i8>*
-  %wide.load = load <2 x i8>, <2 x i8>* %1, align 4
+  %0 = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %index
+  %1 = bitcast ptr %0 to ptr
+  %wide.load = load <2 x i8>, ptr %1, align 4
   %2 = zext <2 x i8> %wide.load to <2 x i32>
-  %3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %index
-  %4 = bitcast i8* %3 to <2 x i8>*
-  %wide.load1 = load <2 x i8>, <2 x i8>* %4, align 4
+  %3 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %index
+  %4 = bitcast ptr %3 to ptr
+  %wide.load1 = load <2 x i8>, ptr %4, align 4
   %5 = zext <2 x i8> %wide.load1 to <2 x i32>
   %6 = sub nsw <2 x i32> %2, %5
   %7 = icmp sgt <2 x i32> %6, <i32 -1, i32 -1>
@@ -663,13 +661,13 @@ entry:
 vector.body:
   %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
   %vec.phi = phi <4 x i32> [ zeroinitializer, %entry ], [ %10, %vector.body ]
-  %0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %index
-  %1 = bitcast i8* %0 to <4 x i8>*
-  %wide.load = load <4 x i8>, <4 x i8>* %1, align 4
+  %0 = getelementptr inbounds [1024 x i8], ptr @a, i64 0, i64 %index
+  %1 = bitcast ptr %0 to ptr
+  %wide.load = load <4 x i8>, ptr %1, align 4
   %2 = zext <4 x i8> %wide.load to <4 x i32>
-  %3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %index
-  %4 = bitcast i8* %3 to <4 x i8>*
-  %wide.load1 = load <4 x i8>, <4 x i8>* %4, align 4
+  %3 = getelementptr inbounds [1024 x i8], ptr @b, i64 0, i64 %index
+  %4 = bitcast ptr %3 to ptr
+  %wide.load1 = load <4 x i8>, ptr %4, align 4
   %5 = zext <4 x i8> %wide.load1 to <4 x i32>
   %6 = sub nsw <4 x i32> %2, %5
   %7 = icmp sgt <4 x i32> %6, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -690,7 +688,7 @@ middle.block:
 }
 
 
-define dso_local i32 @sad_nonloop_4i8(<4 x i8>* nocapture readonly %p, i64, <4 x i8>* nocapture readonly %q) local_unnamed_addr #0 {
+define dso_local i32 @sad_nonloop_4i8(ptr nocapture readonly %p, i64, ptr nocapture readonly %q) local_unnamed_addr #0 {
 ; SSE2-LABEL: sad_nonloop_4i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -706,9 +704,9 @@ define dso_local i32 @sad_nonloop_4i8(<4 x i8>* nocapture readonly %p, i64, <4 x
 ; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-  %v1 = load <4 x i8>, <4 x i8>* %p, align 1
+  %v1 = load <4 x i8>, ptr %p, align 1
   %z1 = zext <4 x i8> %v1 to <4 x i32>
-  %v2 = load <4 x i8>, <4 x i8>* %q, align 1
+  %v2 = load <4 x i8>, ptr %q, align 1
   %z2 = zext <4 x i8> %v2 to <4 x i32>
   %sub = sub nsw <4 x i32> %z1, %z2
   %isneg = icmp sgt <4 x i32> %sub, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -722,7 +720,7 @@ define dso_local i32 @sad_nonloop_4i8(<4 x i8>* nocapture readonly %p, i64, <4 x
   ret i32 %sum
 }
 
-define dso_local i32 @sad_nonloop_8i8(<8 x i8>* nocapture readonly %p, i64, <8 x i8>* nocapture readonly %q) local_unnamed_addr #0 {
+define dso_local i32 @sad_nonloop_8i8(ptr nocapture readonly %p, i64, ptr nocapture readonly %q) local_unnamed_addr #0 {
 ; SSE2-LABEL: sad_nonloop_8i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
@@ -738,9 +736,9 @@ define dso_local i32 @sad_nonloop_8i8(<8 x i8>* nocapture readonly %p, i64, <8 x
 ; AVX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-  %v1 = load <8 x i8>, <8 x i8>* %p, align 1
+  %v1 = load <8 x i8>, ptr %p, align 1
   %z1 = zext <8 x i8> %v1 to <8 x i32>
-  %v2 = load <8 x i8>, <8 x i8>* %q, align 1
+  %v2 = load <8 x i8>, ptr %q, align 1
   %z2 = zext <8 x i8> %v2 to <8 x i32>
   %sub = sub nsw <8 x i32> %z1, %z2
   %isneg = icmp sgt <8 x i32> %sub, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -756,7 +754,7 @@ define dso_local i32 @sad_nonloop_8i8(<8 x i8>* nocapture readonly %p, i64, <8 x
   ret i32 %sum
 }
 
-define dso_local i32 @sad_nonloop_16i8(<16 x i8>* nocapture readonly %p, i64, <16 x i8>* nocapture readonly %q) local_unnamed_addr #0 {
+define dso_local i32 @sad_nonloop_16i8(ptr nocapture readonly %p, i64, ptr nocapture readonly %q) local_unnamed_addr #0 {
 ; SSE2-LABEL: sad_nonloop_16i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqu (%rdi), %xmm0
@@ -775,9 +773,9 @@ define dso_local i32 @sad_nonloop_16i8(<16 x i8>* nocapture readonly %p, i64, <1
 ; AVX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
-  %v1 = load <16 x i8>, <16 x i8>* %p, align 1
+  %v1 = load <16 x i8>, ptr %p, align 1
   %z1 = zext <16 x i8> %v1 to <16 x i32>
-  %v2 = load <16 x i8>, <16 x i8>* %q, align 1
+  %v2 = load <16 x i8>, ptr %q, align 1
   %z2 = zext <16 x i8> %v2 to <16 x i32>
   %sub = sub nsw <16 x i32> %z1, %z2
   %isneg = icmp sgt <16 x i32> %sub, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -795,7 +793,7 @@ define dso_local i32 @sad_nonloop_16i8(<16 x i8>* nocapture readonly %p, i64, <1
   ret i32 %sum
 }
 
-define dso_local i32 @sad_nonloop_32i8(<32 x i8>* nocapture readonly %p, i64, <32 x i8>* nocapture readonly %q) local_unnamed_addr #0 {
+define dso_local i32 @sad_nonloop_32i8(ptr nocapture readonly %p, i64, ptr nocapture readonly %q) local_unnamed_addr #0 {
 ; SSE2-LABEL: sad_nonloop_32i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqu (%rdx), %xmm0
@@ -845,9 +843,9 @@ define dso_local i32 @sad_nonloop_32i8(<32 x i8>* nocapture readonly %p, i64, <3
 ; AVX512-NEXT:    vmovd %xmm0, %eax
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
-  %v1 = load <32 x i8>, <32 x i8>* %p, align 1
+  %v1 = load <32 x i8>, ptr %p, align 1
   %z1 = zext <32 x i8> %v1 to <32 x i32>
-  %v2 = load <32 x i8>, <32 x i8>* %q, align 1
+  %v2 = load <32 x i8>, ptr %q, align 1
   %z2 = zext <32 x i8> %v2 to <32 x i32>
   %sub = sub nsw <32 x i32> %z1, %z2
   %isneg = icmp sgt <32 x i32> %sub, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -867,7 +865,7 @@ define dso_local i32 @sad_nonloop_32i8(<32 x i8>* nocapture readonly %p, i64, <3
   ret i32 %sum
 }
 
-define dso_local i32 @sad_nonloop_64i8(<64 x i8>* nocapture readonly %p, i64, <64 x i8>* nocapture readonly %q) local_unnamed_addr #0 {
+define dso_local i32 @sad_nonloop_64i8(ptr nocapture readonly %p, i64, ptr nocapture readonly %q) local_unnamed_addr #0 {
 ; SSE2-LABEL: sad_nonloop_64i8:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqu (%rdx), %xmm0
@@ -952,9 +950,9 @@ define dso_local i32 @sad_nonloop_64i8(<64 x i8>* nocapture readonly %p, i64, <6
 ; AVX512BW-NEXT:    vmovd %xmm0, %eax
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
-  %v1 = load <64 x i8>, <64 x i8>* %p, align 1
+  %v1 = load <64 x i8>, ptr %p, align 1
   %z1 = zext <64 x i8> %v1 to <64 x i32>
-  %v2 = load <64 x i8>, <64 x i8>* %q, align 1
+  %v2 = load <64 x i8>, ptr %q, align 1
   %z2 = zext <64 x i8> %v2 to <64 x i32>
   %sub = sub nsw <64 x i32> %z1, %z2
   %isneg = icmp sgt <64 x i32> %sub, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -978,7 +976,7 @@ define dso_local i32 @sad_nonloop_64i8(<64 x i8>* nocapture readonly %p, i64, <6
 
 ; This contains an unrolled sad loop with a non-zero initial value.
 ; DAGCombiner reassociation previously rewrote the adds to move the constant vector further down the tree. This resulted in the vector-reduction flag being lost.
-define dso_local i32 @sad_unroll_nonzero_initial(<16 x i8>* %arg, <16 x i8>* %arg1, <16 x i8>* %arg2, <16 x i8>* %arg3) {
+define dso_local i32 @sad_unroll_nonzero_initial(ptr %arg, ptr %arg1, ptr %arg2, ptr %arg3) {
 ; SSE2-LABEL: sad_unroll_nonzero_initial:
 ; SSE2:       # %bb.0: # %bb
 ; SSE2-NEXT:    movdqu (%rdi), %xmm0
@@ -1011,8 +1009,8 @@ define dso_local i32 @sad_unroll_nonzero_initial(<16 x i8>* %arg, <16 x i8>* %ar
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
 bb:
-  %tmp = load <16 x i8>, <16 x i8>* %arg, align 1
-  %tmp4 = load <16 x i8>, <16 x i8>* %arg1, align 1
+  %tmp = load <16 x i8>, ptr %arg, align 1
+  %tmp4 = load <16 x i8>, ptr %arg1, align 1
   %tmp5 = zext <16 x i8> %tmp to <16 x i32>
   %tmp6 = zext <16 x i8> %tmp4 to <16 x i32>
   %tmp7 = sub nsw <16 x i32> %tmp5, %tmp6
@@ -1020,8 +1018,8 @@ bb:
   %tmp9 = sub nsw <16 x i32> zeroinitializer, %tmp7
   %tmp10 = select <16 x i1> %tmp8, <16 x i32> %tmp9, <16 x i32> %tmp7
   %tmp11 = add nuw nsw <16 x i32> %tmp10, <i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
-  %tmp12 = load <16 x i8>, <16 x i8>* %arg2, align 1
-  %tmp13 = load <16 x i8>, <16 x i8>* %arg3, align 1
+  %tmp12 = load <16 x i8>, ptr %arg2, align 1
+  %tmp13 = load <16 x i8>, ptr %arg3, align 1
   %tmp14 = zext <16 x i8> %tmp12 to <16 x i32>
   %tmp15 = zext <16 x i8> %tmp13 to <16 x i32>
   %tmp16 = sub nsw <16 x i32> %tmp14, %tmp15
@@ -1043,7 +1041,7 @@ bb:
 
 ; This test contains two absolute difference patterns joined by an add. The result of that add is then reduced to a single element.
 ; SelectionDAGBuilder should tag the joining add as a vector reduction. We neeed to recognize that both sides can use psadbw.
-define dso_local i32 @sad_double_reduction(<16 x i8>* %arg, <16 x i8>* %arg1, <16 x i8>* %arg2, <16 x i8>* %arg3) {
+define dso_local i32 @sad_double_reduction(ptr %arg, ptr %arg1, ptr %arg2, ptr %arg3) {
 ; SSE2-LABEL: sad_double_reduction:
 ; SSE2:       # %bb.0: # %bb
 ; SSE2-NEXT:    movdqu (%rdi), %xmm0
@@ -1074,16 +1072,16 @@ define dso_local i32 @sad_double_reduction(<16 x i8>* %arg, <16 x i8>* %arg1, <1
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
 bb:
-  %tmp = load <16 x i8>, <16 x i8>* %arg, align 1
-  %tmp4 = load <16 x i8>, <16 x i8>* %arg1, align 1
+  %tmp = load <16 x i8>, ptr %arg, align 1
+  %tmp4 = load <16 x i8>, ptr %arg1, align 1
   %tmp5 = zext <16 x i8> %tmp to <16 x i32>
   %tmp6 = zext <16 x i8> %tmp4 to <16 x i32>
   %tmp7 = sub nsw <16 x i32> %tmp5, %tmp6
   %tmp8 = icmp slt <16 x i32> %tmp7, zeroinitializer
   %tmp9 = sub nsw <16 x i32> zeroinitializer, %tmp7
   %tmp10 = select <16 x i1> %tmp8, <16 x i32> %tmp9, <16 x i32> %tmp7
-  %tmp11 = load <16 x i8>, <16 x i8>* %arg2, align 1
-  %tmp12 = load <16 x i8>, <16 x i8>* %arg3, align 1
+  %tmp11 = load <16 x i8>, ptr %arg2, align 1
+  %tmp12 = load <16 x i8>, ptr %arg3, align 1
   %tmp13 = zext <16 x i8> %tmp11 to <16 x i32>
   %tmp14 = zext <16 x i8> %tmp12 to <16 x i32>
   %tmp15 = sub nsw <16 x i32> %tmp13, %tmp14
@@ -1105,7 +1103,7 @@ bb:
 
 ; This test contains two absolute difference patterns joined by an add. The result of that add is then reduced to a single element.
 ; SelectionDAGBuilder should tag the joining add as a vector reduction. We neeed to recognize that both sides can use psadbw.
-define dso_local i32 @sad_double_reduction_abs(<16 x i8>* %arg, <16 x i8>* %arg1, <16 x i8>* %arg2, <16 x i8>* %arg3) {
+define dso_local i32 @sad_double_reduction_abs(ptr %arg, ptr %arg1, ptr %arg2, ptr %arg3) {
 ; SSE2-LABEL: sad_double_reduction_abs:
 ; SSE2:       # %bb.0: # %bb
 ; SSE2-NEXT:    movdqu (%rdi), %xmm0
@@ -1136,14 +1134,14 @@ define dso_local i32 @sad_double_reduction_abs(<16 x i8>* %arg, <16 x i8>* %arg1
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
 bb:
-  %tmp = load <16 x i8>, <16 x i8>* %arg, align 1
-  %tmp4 = load <16 x i8>, <16 x i8>* %arg1, align 1
+  %tmp = load <16 x i8>, ptr %arg, align 1
+  %tmp4 = load <16 x i8>, ptr %arg1, align 1
   %tmp5 = zext <16 x i8> %tmp to <16 x i32>
   %tmp6 = zext <16 x i8> %tmp4 to <16 x i32>
   %tmp7 = sub nsw <16 x i32> %tmp5, %tmp6
   %tmp10 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %tmp7, i1 false)
-  %tmp11 = load <16 x i8>, <16 x i8>* %arg2, align 1
-  %tmp12 = load <16 x i8>, <16 x i8>* %arg3, align 1
+  %tmp11 = load <16 x i8>, ptr %arg2, align 1
+  %tmp12 = load <16 x i8>, ptr %arg3, align 1
   %tmp13 = zext <16 x i8> %tmp11 to <16 x i32>
   %tmp14 = zext <16 x i8> %tmp12 to <16 x i32>
   %tmp15 = sub nsw <16 x i32> %tmp13, %tmp14
diff --git a/llvm/test/ExecutionEngine/Orc/global-variable-alignment.ll b/llvm/test/ExecutionEngine/Orc/global-variable-alignment.ll
index 16fc3ad0547dc..3a4528a025ddf 100644
--- a/llvm/test/ExecutionEngine/Orc/global-variable-alignment.ll
+++ b/llvm/test/ExecutionEngine/Orc/global-variable-alignment.ll
@@ -7,22 +7,20 @@
 ; A failure may indicate a problem with alignment handling in the JIT linker or
 ; JIT memory manager.
 ;
-; ifndef INTEL_SYCL_OPAQUEPOINTER_READY
 ; RUN: %lli %s
-; endif
 
 @A = internal global i8 1, align 1
 @B = global i64 1, align 8
 @C = internal global i8 1, align 1
 
-define i32 @main(i32 %argc, i8** %argv) {
+define i32 @main(i32 %argc, ptr %argv) {
 entry:
-  %0 = ptrtoint i8* @B to i32
+  %0 = ptrtoint ptr @B to i32
   %1 = and i32 %0, 7
-  %2 = load i8, i8* @A
+  %2 = load i8, ptr @A
   %3 = zext i8 %2 to i32
   %4 = add i32 %1, %3
-  %5 = load i8, i8* @C
+  %5 = load i8, ptr @C
   %6 = zext i8 %5 to i32
   %7 = sub i32 %4, %6
   ret i32 %7
diff --git a/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll b/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll
index bbffa2a8c8ea6..854c3bc4aeea9 100644
--- a/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll
+++ b/llvm/test/Instrumentation/InstrProfiling/timestamp-coverage.ll
@@ -1,8 +1,4 @@
-; ifdef INTEL_SYCL_OPAQUEPOINTER_READY
-; COM: opt < %s -passes=instrprof -S | FileCheck %s
-; else
 ; RUN: opt < %s -passes=instrprof -S | FileCheck %s
-; endif
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -10,11 +6,11 @@ target triple = "aarch64-unknown-linux-gnu"
 ; CHECK: @__profc_foo = private global [9 x i8] c"\FF\FF\FF\FF\FF\FF\FF\FF\FF", section "__llvm_prf_cnts", comdat, align 8
 
 define void @_Z3foov() {
-  call void @llvm.instrprof.timestamp(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 9, i32 0)
+  call void @llvm.instrprof.timestamp(ptr getelementptr inbounds ([3 x i8], ptr @__profn_foo, i32 0, i32 0), i64 12345678, i32 9, i32 0)
   ; CHECK: call void @__llvm_profile_set_timestamp(ptr @__profc_foo)
-  call void @llvm.instrprof.cover(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 9, i32 8)
+  call void @llvm.instrprof.cover(ptr getelementptr inbounds ([3 x i8], ptr @__profn_foo, i32 0, i32 0), i64 12345678, i32 9, i32 8)
   ret void
 }
 
-declare void @llvm.instrprof.timestamp(i8*, i64, i32, i32)
-declare void @llvm.instrprof.cover(i8*, i64, i32, i32)
+declare void @llvm.instrprof.timestamp(ptr, i64, i32, i32)
+declare void @llvm.instrprof.cover(ptr, i64, i32, i32)
diff --git a/llvm/test/Instrumentation/InstrProfiling/timestamp.ll b/llvm/test/Instrumentation/InstrProfiling/timestamp.ll
index 47f0a1ab34a2e..be4c9236e3785 100644
--- a/llvm/test/Instrumentation/InstrProfiling/timestamp.ll
+++ b/llvm/test/Instrumentation/InstrProfiling/timestamp.ll
@@ -1,8 +1,4 @@
-; ifdef INTEL_SYCL_OPAQUEPOINTER_READY
-; COM: opt < %s -passes=instrprof -S | FileCheck %s
-; else
 ; RUN: opt < %s -passes=instrprof -S | FileCheck %s
-; endif
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -10,11 +6,11 @@ target triple = "aarch64-unknown-linux-gnu"
 ; CHECK: @__profc_foo = private global [2 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8
 
 define void @_Z3foov() {
-  call void @llvm.instrprof.timestamp(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 2, i32 0)
+  call void @llvm.instrprof.timestamp(ptr getelementptr inbounds ([3 x i8], ptr @__profn_foo, i32 0, i32 0), i64 12345678, i32 2, i32 0)
   ; CHECK: call void @__llvm_profile_set_timestamp(ptr @__profc_foo)
-  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12345678, i32 2, i32 1)
+  call void @llvm.instrprof.increment(ptr getelementptr inbounds ([3 x i8], ptr @__profn_foo, i32 0, i32 0), i64 12345678, i32 2, i32 1)
   ret void
 }
 
-declare void @llvm.instrprof.timestamp(i8*, i64, i32, i32)
-declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
+declare void @llvm.instrprof.timestamp(ptr, i64, i32, i32)
+declare void @llvm.instrprof.increment(ptr, i64, i32, i32)
diff --git a/llvm/test/Transforms/GVNHoist/infinite-loop-indirect.ll b/llvm/test/Transforms/GVNHoist/infinite-loop-indirect.ll
index 12e45dceca90f..61fa0e07f3480 100644
--- a/llvm/test/Transforms/GVNHoist/infinite-loop-indirect.ll
+++ b/llvm/test/Transforms/GVNHoist/infinite-loop-indirect.ll
@@ -1,20 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
-; ifdef INTEL_SYCL_OPAQUEPOINTER_READY
-; COM: opt -S -passes=gvn-hoist < %s | FileCheck %s
-; else
 ; RUN: opt -S -passes=gvn-hoist < %s | FileCheck %s
-; endif
 
 ; Checking gvn-hoist in case of indirect branches.
 
-%class.bar = type { i8*, %class.base* }
+%class.bar = type { ptr, ptr }
 %class.base = type { i32 (...)** }
 
 @bar = local_unnamed_addr global i32 ()* null, align 8
 @bar1 = local_unnamed_addr global i32 ()* null, align 8
 
 ; Check that the bitcast is not hoisted because it is after an indirect call
-define i32 @foo(i32* nocapture readonly %i) {
+define i32 @foo(ptr nocapture readonly %i) {
 ; CHECK-LABEL: define i32 @foo
 ; CHECK-SAME: (ptr nocapture readonly [[I:%.*]]) {
 ; CHECK-NEXT:  entry:
@@ -40,21 +36,21 @@ define i32 @foo(i32* nocapture readonly %i) {
 ;
 entry:
   %agg.tmp = alloca %class.bar, align 8
-  %x= getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1
-  %y = load %class.base*, %class.base** %x, align 8
-  %0 = load i32, i32* %i, align 4
+  %x= getelementptr inbounds %class.bar, ptr %agg.tmp, i64 0, i32 1
+  %y = load ptr, ptr %x, align 8
+  %0 = load i32, ptr %i, align 4
   %.off = add i32 %0, -1
   %switch = icmp ult i32 %.off, 2
   br i1 %switch, label %l1.preheader, label %sw.default
 
 l1.preheader:                                     ; preds = %sw.default, %entry
-  %b1 = bitcast %class.base* %y to void (%class.base*)***
+  %b1 = bitcast ptr %y to void (ptr)***
   br label %l1
 
 l1:                                               ; preds = %l1.preheader, %l1
   %1 = load i32 ()*, i32 ()** @bar, align 8
   %call = tail call i32 %1()
-  %b2 = bitcast %class.base* %y to void (%class.base*)***
+  %b2 = bitcast ptr %y to void (ptr)***
   br label %l1
 
 sw.default:                                       ; preds = %entry
@@ -66,7 +62,7 @@ sw.default:                                       ; preds = %entry
 
 ; Any instruction inside an infinite loop will not be hoisted because
 ; there is no path to exit of the function.
-define i32 @foo1(i32* nocapture readonly %i) {
+define i32 @foo1(ptr nocapture readonly %i) {
 ; CHECK-LABEL: define i32 @foo1
 ; CHECK-SAME: (ptr nocapture readonly [[I:%.*]]) {
 ; CHECK-NEXT:  entry:
@@ -94,22 +90,22 @@ define i32 @foo1(i32* nocapture readonly %i) {
 ;
 entry:
   %agg.tmp = alloca %class.bar, align 8
-  %x= getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1
-  %y = load %class.base*, %class.base** %x, align 8
-  %0 = load i32, i32* %i, align 4
+  %x= getelementptr inbounds %class.bar, ptr %agg.tmp, i64 0, i32 1
+  %y = load ptr, ptr %x, align 8
+  %0 = load i32, ptr %i, align 4
   %.off = add i32 %0, -1
   %switch = icmp ult i32 %.off, 2
   br i1 %switch, label %l1.preheader, label %sw.default
 
 l1.preheader:                                     ; preds = %sw.default, %entry
-  %b1 = bitcast %class.base* %y to void (%class.base*)***
-  %y1 = load %class.base*, %class.base** %x, align 8
+  %b1 = bitcast ptr %y to void (ptr)***
+  %y1 = load ptr, ptr %x, align 8
   br label %l1
 
 l1:                                               ; preds = %l1.preheader, %l1
-  %b2 = bitcast %class.base* %y to void (%class.base*)***
+  %b2 = bitcast ptr %y to void (ptr)***
   %1 = load i32 ()*, i32 ()** @bar, align 8
-  %y2 = load %class.base*, %class.base** %x, align 8
+  %y2 = load ptr, ptr %x, align 8
   %call = tail call i32 %1()
   br label %l1
 
@@ -120,7 +116,7 @@ sw.default:                                       ; preds = %entry
 }
 
 ; Check that bitcast is hoisted even when one of them is partially redundant.
-define i32 @test13(i32* %P, i8* %Ptr, i32* nocapture readonly %i) {
+define i32 @test13(ptr %P, ptr %Ptr, ptr nocapture readonly %i) {
 ; CHECK-LABEL: define i32 @test13
 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[PTR:%.*]], ptr nocapture readonly [[I:%.*]]) {
 ; CHECK-NEXT:  entry:
@@ -143,18 +139,18 @@ define i32 @test13(i32* %P, i8* %Ptr, i32* nocapture readonly %i) {
 ;
 entry:
   %agg.tmp = alloca %class.bar, align 8
-  %x= getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1
-  %y = load %class.base*, %class.base** %x, align 8
-  indirectbr i8* %Ptr, [label %BrBlock, label %B2]
+  %x= getelementptr inbounds %class.bar, ptr %agg.tmp, i64 0, i32 1
+  %y = load ptr, ptr %x, align 8
+  indirectbr ptr %Ptr, [label %BrBlock, label %B2]
 
 B2:
-  %b1 = bitcast %class.base* %y to void (%class.base*)***
-  store i32 4, i32 *%P
+  %b1 = bitcast ptr %y to void (ptr)***
+  store i32 4, ptr%P
   br label %BrBlock
 
 BrBlock:
-  %b2 = bitcast %class.base* %y to void (%class.base*)***
-  %L = load i32, i32* %P
+  %b2 = bitcast ptr %y to void (ptr)***
+  %L = load i32, ptr %P
   %C = icmp eq i32 %L, 42
   br i1 %C, label %T, label %F
 
@@ -167,7 +163,7 @@ F:
 ; Check that the bitcast is not hoisted because anticipability
 ; cannot be guaranteed here as one of the indirect branch targets
 ; do not have the bitcast instruction.
-define i32 @test14(i32* %P, i8* %Ptr, i32* nocapture readonly %i) {
+define i32 @test14(ptr %P, ptr %Ptr, ptr nocapture readonly %i) {
 ; CHECK-LABEL: define i32 @test14
 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[PTR:%.*]], ptr nocapture readonly [[I:%.*]]) {
 ; CHECK-NEXT:  entry:
@@ -193,33 +189,33 @@ define i32 @test14(i32* %P, i8* %Ptr, i32* nocapture readonly %i) {
 ;
 entry:
   %agg.tmp = alloca %class.bar, align 8
-  %x= getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1
-  %y = load %class.base*, %class.base** %x, align 8
-  indirectbr i8* %Ptr, [label %BrBlock, label %B2, label %T]
+  %x= getelementptr inbounds %class.bar, ptr %agg.tmp, i64 0, i32 1
+  %y = load ptr, ptr %x, align 8
+  indirectbr ptr %Ptr, [label %BrBlock, label %B2, label %T]
 
 B2:
-  %b1 = bitcast %class.base* %y to void (%class.base*)***
-  store i32 4, i32 *%P
+  %b1 = bitcast ptr %y to void (ptr)***
+  store i32 4, ptr%P
   br label %BrBlock
 
 BrBlock:
-  %b2 = bitcast %class.base* %y to void (%class.base*)***
-  %L = load i32, i32* %P
+  %b2 = bitcast ptr %y to void (ptr)***
+  %L = load i32, ptr %P
   %C = icmp eq i32 %L, 42
   br i1 %C, label %T, label %F
 
 T:
-  %pi = load i32, i32* %i, align 4
+  %pi = load i32, ptr %i, align 4
   ret i32 %pi
 F:
-  %pl = load i32, i32* %P
+  %pl = load i32, ptr %P
   ret i32 %pl
 }
 
 
 ; Check that the bitcast is not hoisted because of a cycle
 ; due to indirect branches
-define i32 @test16(i32* %P, i8* %Ptr, i32* nocapture readonly %i) {
+define i32 @test16(ptr %P, ptr %Ptr, ptr nocapture readonly %i) {
 ; CHECK-LABEL: define i32 @test16
 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[PTR:%.*]], ptr nocapture readonly [[I:%.*]]) {
 ; CHECK-NEXT:  entry:
@@ -244,31 +240,31 @@ define i32 @test16(i32* %P, i8* %Ptr, i32* nocapture readonly %i) {
 ;
 entry:
   %agg.tmp = alloca %class.bar, align 8
-  %x= getelementptr inbounds %class.bar, %class.bar* %agg.tmp, i64 0, i32 1
-  %y = load %class.base*, %class.base** %x, align 8
-  indirectbr i8* %Ptr, [label %BrBlock, label %B2]
+  %x= getelementptr inbounds %class.bar, ptr %agg.tmp, i64 0, i32 1
+  %y = load ptr, ptr %x, align 8
+  indirectbr ptr %Ptr, [label %BrBlock, label %B2]
 
 B2:
-  %b1 = bitcast %class.base* %y to void (%class.base*)***
-  %0 = load i32, i32* %i, align 4
-  store i32 %0, i32 *%P
+  %b1 = bitcast ptr %y to void (ptr)***
+  %0 = load i32, ptr %i, align 4
+  store i32 %0, ptr%P
   br label %BrBlock
 
 BrBlock:
-  %b2 = bitcast %class.base* %y to void (%class.base*)***
-  %L = load i32, i32* %P
+  %b2 = bitcast ptr %y to void (ptr)***
+  %L = load i32, ptr %P
   %C = icmp eq i32 %L, 42
   br i1 %C, label %T, label %F
 
 T:
-  indirectbr i32* %P, [label %BrBlock, label %B2]
+  indirectbr ptr %P, [label %BrBlock, label %B2]
 
 F:
-  indirectbr i8* %Ptr, [label %BrBlock, label %B2]
+  indirectbr ptr %Ptr, [label %BrBlock, label %B2]
 }
 
 
-@_ZTIi = external constant i8*
+@_ZTIi = external constant ptr
 
 ; Check that an instruction is not hoisted out of landing pad (%lpad4)
 ; Also within a landing pad no redundancies are removed by gvn-hoist,
@@ -276,7 +272,7 @@ F:
 ; landing pad has direct branches (e.g., %lpad to %catch1, %catch)
 ; This CFG has a cycle (%lpad -> %catch1 -> %lpad4 -> %lpad)
 
-define i32 @foo2(i32* nocapture readonly %i) local_unnamed_addr personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define i32 @foo2(ptr nocapture readonly %i) local_unnamed_addr personality ptr bitcast (i32 (...)* @__gxx_personality_v0 to ptr) {
 ; CHECK-LABEL: define i32 @foo2
 ; CHECK-SAME: (ptr nocapture readonly [[I:%.*]]) local_unnamed_addr personality ptr @__gxx_personality_v0 {
 ; CHECK-NEXT:  entry:
@@ -326,28 +322,28 @@ define i32 @foo2(i32* nocapture readonly %i) local_unnamed_addr personality i8*
 ; CHECK-NEXT:    ret i32 [[BC2]]
 ;
 entry:
-  %0 = load i32, i32* %i, align 4
+  %0 = load i32, ptr %i, align 4
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %try.cont, label %if.then
 
 if.then:
-  %exception = tail call i8* @__cxa_allocate_exception(i64 4) #2
-  %1 = bitcast i8* %exception to i32*
-  store i32 %0, i32* %1, align 16
-  invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #3
+  %exception = tail call ptr @__cxa_allocate_exception(i64 4) #2
+  %1 = bitcast ptr %exception to ptr
+  store i32 %0, ptr %1, align 16
+  invoke void @__cxa_throw(ptr %exception, ptr bitcast (ptr @_ZTIi to ptr), ptr null) #3
   to label %unreachable unwind label %lpad
 
 lpad:
-  %2 = landingpad { i8*, i32 }
-  catch i8* bitcast (i8** @_ZTIi to i8*)
-  catch i8* null
+  %2 = landingpad { ptr, i32 }
+  catch ptr bitcast (ptr @_ZTIi to ptr)
+  catch ptr null
   %bc1 = add i32 %0, 10
-  %3 = extractvalue { i8*, i32 } %2, 0
-  %4 = extractvalue { i8*, i32 } %2, 1
-  %5 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #2
+  %3 = extractvalue { ptr, i32 } %2, 0
+  %4 = extractvalue { ptr, i32 } %2, 1
+  %5 = tail call i32 @llvm.eh.typeid.for(ptr bitcast (ptr @_ZTIi to ptr)) #2
   %matches = icmp eq i32 %4, %5
   %bc7 = add i32 %0, 10
-  %6 = tail call i8* @__cxa_begin_catch(i8* %3) #2
+  %6 = tail call ptr @__cxa_begin_catch(ptr %3) #2
   br i1 %matches, label %catch1, label %catch
 
 catch1:
@@ -357,17 +353,17 @@ catch1:
 
 catch:
   %bc4 = add i32 %0, 10
-  %7 = load i32, i32* %i, align 4
+  %7 = load i32, ptr %i, align 4
   %add = add nsw i32 %7, 1
   tail call void @__cxa_end_catch()
   br label %try.cont
 
 lpad4:
-  %8 = landingpad { i8*, i32 }
+  %8 = landingpad { ptr, i32 }
   cleanup
   %bc5 = add i32 %0, 10
   tail call void @__cxa_end_catch() #2
-  invoke void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #3
+  invoke void @__cxa_throw(ptr %exception, ptr bitcast (ptr @_ZTIi to ptr), ptr null) #3
   to label %unreachable unwind label %lpad
 
 try.cont:
@@ -380,16 +376,16 @@ unreachable:
   ret i32 %bc2
 }
 
-declare i8* @__cxa_allocate_exception(i64) local_unnamed_addr
+declare ptr @__cxa_allocate_exception(i64) local_unnamed_addr
 
-declare void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr
+declare void @__cxa_throw(ptr, ptr, ptr) local_unnamed_addr
 
 declare i32 @__gxx_personality_v0(...)
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #1
+declare i32 @llvm.eh.typeid.for(ptr) #1
 
-declare i8* @__cxa_begin_catch(i8*) local_unnamed_addr
+declare ptr @__cxa_begin_catch(ptr) local_unnamed_addr
 
 declare void @__cxa_end_catch() local_unnamed_addr
 
diff --git a/llvm/test/Transforms/GlobalOpt/issue62384.ll b/llvm/test/Transforms/GlobalOpt/issue62384.ll
index 65dbfa6a98665..cc2bc8940b891 100644
--- a/llvm/test/Transforms/GlobalOpt/issue62384.ll
+++ b/llvm/test/Transforms/GlobalOpt/issue62384.ll
@@ -1,9 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
-; if INTEL_SYCL_OPAQUEPOINTER_READY
 ; RUN: opt -S -p=globalopt %s | FileCheck %s
-; else
-// XFAIL: *
-; end
 
 
 @llvm.used = appending global [1 x ptr] [ptr @ctor]
diff --git a/llvm/test/Transforms/InstCombine/scalable-vector-struct.ll b/llvm/test/Transforms/InstCombine/scalable-vector-struct.ll
index f359e89578555..c9966be72fb51 100644
--- a/llvm/test/Transforms/InstCombine/scalable-vector-struct.ll
+++ b/llvm/test/Transforms/InstCombine/scalable-vector-struct.ll
@@ -1,23 +1,21 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
-; ifndef INTEL_SYCL_OPAQUEPOINTER_READY
 ; RUN: opt -passes=instcombine -S < %s 2>&1 | FileCheck %s
-; endif
 
 %struct.test = type { <vscale x 1 x i32>, <vscale x 1 x i32> }
 
-define <vscale x 1 x i32> @load(%struct.test* %x) {
+define <vscale x 1 x i32> @load(ptr %x) {
 ; CHECK-LABEL: define <vscale x 1 x i32> @load
 ; CHECK-SAME: (ptr [[X:%.*]]) {
 ; CHECK-NEXT:    [[A:%.*]] = load [[STRUCT_TEST:%.*]], ptr [[X]], align 4
 ; CHECK-NEXT:    [[B:%.*]] = extractvalue [[STRUCT_TEST]] [[A]], 1
 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[B]]
 ;
-  %a = load %struct.test, %struct.test* %x
+  %a = load %struct.test, ptr %x
   %b = extractvalue %struct.test %a, 1
   ret <vscale x 1 x i32> %b
 }
 
-define void @store(%struct.test* %x, <vscale x 1 x i32> %y, <vscale x 1 x i32> %z) {
+define void @store(ptr %x, <vscale x 1 x i32> %y, <vscale x 1 x i32> %z) {
 ; CHECK-LABEL: define void @store
 ; CHECK-SAME: (ptr [[X:%.*]], <vscale x 1 x i32> [[Y:%.*]], <vscale x 1 x i32> [[Z:%.*]]) {
 ; CHECK-NEXT:    [[A:%.*]] = insertvalue [[STRUCT_TEST:%.*]] undef, <vscale x 1 x i32> [[Y]], 0
@@ -27,6 +25,6 @@ define void @store(%struct.test* %x, <vscale x 1 x i32> %y, <vscale x 1 x i32> %
 ;
   %a = insertvalue %struct.test undef, <vscale x 1 x i32> %y, 0
   %b = insertvalue %struct.test %a, <vscale x 1 x i32> %z, 1
-  store %struct.test %b, %struct.test* %x
+  store %struct.test %b, ptr %x
   ret void
 }
diff --git a/llvm/test/Transforms/MoveAutoInit/clobber.ll b/llvm/test/Transforms/MoveAutoInit/clobber.ll
index f1f7f63bf8078..09084b6ddc51b 100644
--- a/llvm/test/Transforms/MoveAutoInit/clobber.ll
+++ b/llvm/test/Transforms/MoveAutoInit/clobber.ll
@@ -1,11 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; Checks that move-auto-init can move instruction passed unclobbering memory
 ; instructions.
-; ifdef INTEL_SYCL_OPAQUEPOINTER_READY
-; COM: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s
-; else
 ; RUN: opt < %s -S -passes='move-auto-init' -verify-memoryssa | FileCheck %s
-; endif
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 
@@ -49,25 +45,25 @@ define i32 @foo(i32 noundef %0, i32 noundef %1, i32 noundef %2) #0 {
 
   %4 = alloca [100 x i8], align 16
   %5 = alloca [2 x i8], align 1
-  %6 = getelementptr inbounds [100 x i8], [100 x i8]* %4, i64 0, i64 0
-  call void @llvm.lifetime.start.p0i8(i64 100, i8* nonnull %6) #3
+  %6 = getelementptr inbounds [100 x i8], ptr %4, i64 0, i64 0
+  call void @llvm.lifetime.start.p0(i64 100, ptr nonnull %6) #3
   ; This memset must move.
-  call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 16 dereferenceable(100) %6, i8 -86, i64 100, i1 false), !annotation !0
-  %7 = getelementptr inbounds [2 x i8], [2 x i8]* %5, i64 0, i64 0
-  call void @llvm.lifetime.start.p0i8(i64 2, i8* nonnull %7) #3
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 16 dereferenceable(100) %6, i8 -86, i64 100, i1 false), !annotation !0
+  %7 = getelementptr inbounds [2 x i8], ptr %5, i64 0, i64 0
+  call void @llvm.lifetime.start.p0(i64 2, ptr nonnull %7) #3
   ; This store must move.
-  store i8 -86, i8* %7, align 1, !annotation !0
-  %8 = getelementptr inbounds [2 x i8], [2 x i8]* %5, i64 0, i64 1
+  store i8 -86, ptr %7, align 1, !annotation !0
+  %8 = getelementptr inbounds [2 x i8], ptr %5, i64 0, i64 1
   ; This store must move.
-  store i8 -86, i8* %8, align 1, !annotation !0
+  store i8 -86, ptr %8, align 1, !annotation !0
   %9 = icmp eq i32 %1, 0
   br i1 %9, label %15, label %10
 
 10:
   %11 = sext i32 %0 to i64
-  %12 = getelementptr inbounds [100 x i8], [100 x i8]* %4, i64 0, i64 %11
-  store i8 12, i8* %12, align 1
-  %13 = load i8, i8* %6, align 16
+  %12 = getelementptr inbounds [100 x i8], ptr %4, i64 0, i64 %11
+  store i8 12, ptr %12, align 1
+  %13 = load i8, ptr %6, align 16
   %14 = sext i8 %13 to i32
   br label %22
 
@@ -77,24 +73,24 @@ define i32 @foo(i32 noundef %0, i32 noundef %1, i32 noundef %2) #0 {
 
 17:
   %18 = sext i32 %0 to i64
-  %19 = getelementptr inbounds [2 x i8], [2 x i8]* %5, i64 0, i64 %18
-  store i8 12, i8* %19, align 1
-  %20 = load i8, i8* %7, align 1
+  %19 = getelementptr inbounds [2 x i8], ptr %5, i64 0, i64 %18
+  store i8 12, ptr %19, align 1
+  %20 = load i8, ptr %7, align 1
   %21 = sext i8 %20 to i32
   br label %22
 
 22:
   %23 = phi i32 [ %14, %10 ], [ %21, %17 ], [ 0, %15 ]
-  call void @llvm.lifetime.end.p0i8(i64 2, i8* nonnull %7) #3
-  call void @llvm.lifetime.end.p0i8(i64 100, i8* nonnull %6) #3
+  call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %7) #3
+  call void @llvm.lifetime.end.p0(i64 100, ptr nonnull %6) #3
   ret i32 %23
 }
 
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #2
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2
 
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
 
 attributes #0 = { mustprogress nofree nosync nounwind readnone uwtable willreturn }
 attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn }
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86-basic.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86-basic.ll
index 67d0ca602da40..314546afb8853 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86-basic.ll
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86-basic.ll
@@ -1,5 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; ifndef INTEL_SYCL_OPAQUEPOINTER_READY
 ;; Check that we accept functions with '$' in the name.
 ; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
 
@@ -8,7 +7,6 @@
 
 ;; Check that we accept .seh_proc below the function label.
 ; RUN: llc -mtriple=x86_64-windows -relocation-model=pic < %s | FileCheck %s --check-prefix=WIN
-; endif
 
 @gv0 = dso_local global i32 0, align 4
 @gv1 = dso_preemptable global i32 0, align 4
@@ -28,8 +26,8 @@ declare void @ext()
 
 define i32 @load() {
 entry:
-  %a = load i32, i32* @gv0
-  %b = load i32, i32* @gv1
+  %a = load i32, ptr @gv0
+  %b = load i32, ptr @gv1
   %c = add i32 %a, %b
   ret i32 %c
 }
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86-basic.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86-basic.ll.expected
index ab1e3e6027952..f46d3071b67f1 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86-basic.ll.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86-basic.ll.expected
@@ -1,5 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; ifndef INTEL_SYCL_OPAQUEPOINTER_READY
 ;; Check that we accept functions with '$' in the name.
 ; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
 
@@ -8,7 +7,6 @@
 
 ;; Check that we accept .seh_proc below the function label.
 ; RUN: llc -mtriple=x86_64-windows -relocation-model=pic < %s | FileCheck %s --check-prefix=WIN
-; endif
 
 @gv0 = dso_local global i32 0, align 4
 @gv1 = dso_preemptable global i32 0, align 4
@@ -91,8 +89,8 @@ define i32 @load() {
 ; WIN-NEXT:    addl gv1(%rip), %eax
 ; WIN-NEXT:    retq
 entry:
-  %a = load i32, i32* @gv0
-  %b = load i32, i32* @gv1
+  %a = load i32, ptr @gv0
+  %b = load i32, ptr @gv1
   %c = add i32 %a, %b
   ret i32 %c
 }