Skip to content

Commit c862a83

Browse files
committed
1. Changed CMakeLists.txt to require from the user to specify
the device target compute capability and use this value as the default. This can still be overriden with the flag --gpu-compute-capability. 2. Added semiring tests for GPU target 3. Changed GPU tests to only run when COMET is compiled with ENABLE_GPU_TARGET=ON
1 parent ae8245d commit c862a83

29 files changed

+670
-25
lines changed

CMakeLists.txt

+5
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,11 @@ option(ENABLE_GPU_TARGET OFF)
125125
if(${ENABLE_GPU_TARGET})
126126
set(TRITON_PATH "" CACHE PATH "Path to Triton")
127127
set(TRITON_BUILD_PATH "${CMAKE_BINARY_DIR}/triton" CACHE INTERNAL "Path to Triton Build")
128+
if(NOT DEFINED CUDA_COMPUTE_CAPABILITY)
129+
message(FATAL_ERROR "Please specify cuda compute capability requested")
130+
endif()
131+
add_compile_definitions(CUDA_COMPUTE_CAPABILITY=${CUDA_COMPUTE_CAPABILITY})
132+
128133
add_subdirectory(${TRITON_PATH} ${TRITON_BUILD_PATH})
129134
get_property(triton_libs GLOBAL PROPERTY TRITON_LIBS)
130135
include_directories("${TRITON_PATH}")

frontends/comet_dsl/comet.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ static cl::opt<TargetDevice> CodegenTarget("target", cl::init(CPU), cl::desc("Co
180180
static cl::opt<int> GPUBlockSizeX("gpu-block-x-size", cl::init(32), cl::desc("GPU Block size in X direction"));
181181
static cl::opt<int> GPUBlockSizeY("gpu-block-y-size", cl::init(8), cl::desc("GPU Block size in Y direction"));
182182
static cl::opt<int> GPUBlockSizeR("gpu-block-r-size", cl::init(32), cl::desc("GPU Block size in R direction"));
183-
static cl::opt<int> GPUComputeCapability("gpu-compute-capability", cl::init(80), cl::desc("GPU compute capability"));
183+
static cl::opt<int> GPUComputeCapability("gpu-compute-capability", cl::init(CUDA_COMPUTE_CAPABILITY), cl::desc("GPU compute capability"));
184184
static cl::opt<int> GPUNumWarps("gpu-num-warps", cl::init(4), cl::desc("GPU number of warps"));
185185
static cl::opt<int> GPUThreadsPerWarp("gpu-threads-per-warp", cl::init(32), cl::desc("GPU threads per warp"));
186186
static cl::opt<int> GPUNumCTAs("gpu-num-ctas", cl::init(1), cl::desc("GPU num CTAs"));

frontends/numpy-scipy/cometpy/MLIRGen/lowering.py

+3
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,9 @@ def lower_dialect_with_jit(ta_dialect_rep, target: str, out_dims, compile_with_f
716716
if target.startswith("sm_") or target.startswith("compute_") or target.startswith("lto_"):
717717
scf_lower_flags += " " + " --convert-to-triton --target=GPU --gpu-compute-capability="+target.split("_")[1]
718718
mlir_lower_flags += " " + "--target=GPU"
719+
elif target == "gpu":
720+
scf_lower_flags += " " + " --convert-to-triton --target=GPU"
721+
mlir_lower_flags += " " + "--target=GPU"
719722
else :
720723
raise "Expected target formats:\
721724
cpu, compute_<version>, sm_<version>, lto_<version>"

frontends/numpy-scipy/integration_tests/ops/gpu/test_eltwise_add_dense_matrix.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def run_numpy(A,B):
88

99
return C
1010

11-
@comet.compile(flags=None, target="sm_70")
11+
@comet.compile(flags=None, target="gpu")
1212
def run_comet_with_jit(A,B):
1313
C = A+B
1414

frontends/numpy-scipy/integration_tests/ops/gpu/test_eltwise_mult_DensexDense_oDense.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def run_numpy(A,B):
88

99
return C
1010

11-
@comet.compile(flags=None, target="sm_70")
11+
@comet.compile(flags=None, target="gpu")
1212
def run_comet_with_jit(A,B):
1313
C = A * B
1414

frontends/numpy-scipy/integration_tests/ops/gpu/test_eltwise_subtract_dense_matrix.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def run_numpy(A,B):
88

99
return C
1010

11-
@comet.compile(flags=None, target="sm_70")
11+
@comet.compile(flags=None, target="gpu")
1212
def run_comet_with_jit(A,B):
1313
C = A - B
1414

frontends/numpy-scipy/integration_tests/ops/gpu/test_mult_dense_ij-ikj-kj.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def run_numpy(A,B):
88

99
return C
1010

11-
@comet.compile(flags=None, target="sm_70")
11+
@comet.compile(flags=None, target="gpu")
1212
def run_comet_with_jit(A,B):
1313
C = comet.einsum('ikj,kj->ij', A,B)
1414

frontends/numpy-scipy/integration_tests/ops/gpu/test_mult_dense_matrix.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ def run_numpy(A,B):
77

88
return C
99

10-
@comet.compile(flags=None, target="sm_70")
10+
@comet.compile(flags=None, target="gpu")
1111
def run_comet_with_jit(A,B):
1212
C = comet.einsum('ij,jk->ik', A,B)
1313

frontends/numpy-scipy/integration_tests/ops/gpu/test_mult_dense_matrix_vector.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def run_numpy(A,B):
88

99
return C
1010

11-
@comet.compile(flags=None, target="sm_70")
11+
@comet.compile(flags=None, target="gpu")
1212
def run_comet_with_jit(A,B):
1313
C = comet.einsum('ij,j->i', A,B)
1414

frontends/numpy-scipy/integration_tests/ops/gpu/test_sum_dense_matrix.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def run_numpy(A):
88

99
return var
1010

11-
@comet.compile(flags=None, target="sm_70")
11+
@comet.compile(flags=None, target="gpu")
1212
def run_comet_with_jit(A):
1313
var = A.sum()
1414

frontends/numpy-scipy/integration_tests/ops/gpu/test_transpose_dense_matrix.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def run_numpy(A):
88

99
return B
1010

11-
@comet.compile(flags=None, target="sm_70")
11+
@comet.compile(flags=None, target="gpu")
1212
def run_comet_with_jit(A):
1313
B = A.transpose()
1414

integration_test/CMakeLists.txt

+8
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,14 @@ message(STATUS "Using COMET_UTILITY_LIBRARIES: ${COMET_UTILITY_LIBRARY_DIR}")
1919
set(COMET_INTEGRATION_TEST_DATA_DIR ${CMAKE_CURRENT_SOURCE_DIR}/data/)
2020
message(STATUS "Using COMET_INTEGRATION_TEST_DATA_DIR: ${COMET_INTEGRATION_TEST_DATA_DIR}")
2121

22+
if(ENABLE_GPU_TARGET)
23+
set(COMET_ENABLE_GPU True)
24+
message(STATUS "Using COMET_ENABLE_GPU: ${COMET_ENABLE_GPU}")
25+
else()
26+
set(COMET_ENABLE_GPU False)
27+
message(STATUS "Using COMET_ENABLE_GPU: ${COMET_ENABLE_GPU}")
28+
endif()
29+
2230
configure_lit_site_cfg(
2331
${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in
2432
${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# RUN: comet-opt --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> Dense_chain_mult_matrix.llvm
2+
# RUN: mlir-cpu-runner Dense_chain_mult_matrix.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s
3+
4+
5+
def main() {
6+
#IndexLabel Declarations
7+
IndexLabel [i] = [2];
8+
IndexLabel [j] = [2];
9+
IndexLabel [k] = [5];
10+
IndexLabel [l] = [2];
11+
12+
#Tensor Declarations
13+
Tensor<double> A([i, j], {Dense});
14+
Tensor<double> B([j, k], {Dense});
15+
Tensor<double> C([k, l], {Dense});
16+
Tensor<double> D([i, l], {Dense});
17+
18+
#Tensor Fill Operation
19+
A[i, j] = 2.2;
20+
B[j, k] = 3.4;
21+
C[k, l] = 1.0;
22+
D[i, l] = 0.0;
23+
24+
D[i, l] = A[i, j] * B[j, k] * C[k,l];
25+
print(D);
26+
}
27+
28+
# Print the result for verification.
29+
# CHECK: data =
30+
# CHECK-NEXT: 74.8,74.8,74.8,74.8,
31+
# REQUIRES: gpu_target_enabled

integration_test/lit.site.cfg.py.in

+3
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ config.comet_integration_test_data_dir = "@COMET_INTEGRATION_TEST_DATA_DIR@"
4141
config.comet_shlib_dir = "@LLVM_LIBRARY_OUTPUT_INTDIR@"
4242
config.timeout = "@COMET_INTEGRATION_TIMEOUT@"
4343

44+
if @COMET_ENABLE_GPU@:
45+
config.available_features.add('gpu_target_enabled')
46+
4447
# Support substitution of the tools_dir with user parameters. This is
4548
# used when we can't determine the tool dir at configuration time.
4649
try:

integration_test/ops/gpu/eltwise_add_dense_matrix.ta

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# RUN: comet-opt --target=GPU --gpu-compute-capability=70 --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> eltwise_add_dense_matrix.llvm
1+
# RUN: comet-opt --target=GPU --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> eltwise_add_dense_matrix.llvm
22
# RUN: mlir-cpu-runner eltwise_add_dense_matrix.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s
33

44

@@ -23,4 +23,5 @@ def main() {
2323

2424
# Print the result for verification.
2525
# CHECK: data =
26-
# CHECK-NEXT: 5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,
26+
# CHECK-NEXT: 5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,5.6,
27+
# REQUIRES: gpu_target_enabled

integration_test/ops/gpu/eltwise_mult_DensexDense_oDense.ta

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# RUN: comet-opt --target=GPU --gpu-compute-capability=70 --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> eltwise_DensexDense_oDense.llvm
1+
# RUN: comet-opt --target=GPU --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> eltwise_DensexDense_oDense.llvm
22
# RUN: mlir-cpu-runner eltwise_DensexDense_oDense.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s
33

44

@@ -27,4 +27,5 @@ def main() {
2727

2828
# Print the result for verification.
2929
# CHECK: data =
30-
# CHECK-NEXT: 8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,
30+
# CHECK-NEXT: 8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,8.64,
31+
# REQUIRES: gpu_target_enabled

integration_test/ops/gpu/eltwise_subtract_dense_matrix.ta

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# RUN: comet-opt --target=GPU --gpu-compute-capability=70 --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> eltwise_sub_dense_matrix.llvm
1+
# RUN: comet-opt --target=GPU --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> eltwise_sub_dense_matrix.llvm
22
# RUN: mlir-cpu-runner eltwise_sub_dense_matrix.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s
33

44

@@ -23,4 +23,5 @@ def main() {
2323

2424
# Print the result for verification.
2525
# CHECK: data =
26-
# CHECK-NEXT: 1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,
26+
# CHECK-NEXT: 1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,1.2,
27+
# REQUIRES: gpu_target_enabled

integration_test/ops/gpu/mult_dense_ij-ikj-kj.ta

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# This example demostrates that the compiler can generate code for arbitrary tensor operations
22
# No assumption that contraction indices should disapper in the output tensor.
33

4-
# RUN: comet-opt --target=GPU --gpu-compute-capability=70 --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> mult_dense_ij-ikj-kj.llvm
4+
# RUN: comet-opt --target=GPU --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> mult_dense_ij-ikj-kj.llvm
55
# RUN: mlir-cpu-runner mult_dense_ij-ikj-kj.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s
66

77
def main() {
@@ -27,4 +27,5 @@ def main() {
2727

2828
# Print the result for verification.
2929
# CHECK: data =
30-
# CHECK-NEXT: 21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,
30+
# CHECK-NEXT: 21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,21.76,
31+
# REQUIRES: gpu_target_enabled

integration_test/ops/gpu/mult_dense_matrix.ta

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# RUN: comet-opt --target=GPU --gpu-compute-capability=70 --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> mult_dense_matrix.llvm
1+
# RUN: comet-opt --target=GPU --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> mult_dense_matrix.llvm
22
# RUN: mlir-cpu-runner mult_dense_matrix.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s
33

44

@@ -24,4 +24,5 @@ def main() {
2424

2525
# Print the result for verification.
2626
# CHECK: data =
27-
# CHECK-NEXT: 29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,
27+
# CHECK-NEXT: 29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,29.92,
28+
# REQUIRES: gpu_target_enabled

integration_test/ops/gpu/mult_dense_matrix_vector.ta

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# RUN: comet-opt --target=GPU --gpu-compute-capability=70 --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> mult_dense_matrix_vector.llvm
1+
# RUN: comet-opt --target=GPU --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> mult_dense_matrix_vector.llvm
22
# RUN: mlir-cpu-runner mult_dense_matrix_vector.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s
33

44
def main() {
@@ -23,4 +23,4 @@ def main() {
2323
# Print the result for verification.
2424
# CHECK: data =
2525
# CHECK-NEXT: 136.16,136.16,136.16,136.16,136.16,136.16,136.16,136.16,
26-
26+
# REQUIRES: gpu_target_enabled

integration_test/ops/gpu/sum_dense_matrix.ta

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# RUN: comet-opt --target=GPU --gpu-compute-capability=70 --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> sum_dense_matrix.llvm
1+
# RUN: comet-opt --target=GPU --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> sum_dense_matrix.llvm
22
# RUN: mlir-cpu-runner sum_dense_matrix.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s
33

44
def main() {
@@ -19,4 +19,5 @@ def main() {
1919

2020
# Print the result for verification.
2121
# CHECK: data =
22-
# CHECK-NEXT: 59.2,
22+
# CHECK-NEXT: 59.2,
23+
# REQUIRES: gpu_target_enabled

integration_test/ops/gpu/transpose_dense_matrix.ta

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# RUN: comet-opt --target=GPU --gpu-compute-capability=70 --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> transpose_dense_matrix.llvm
1+
# RUN: comet-opt --target=GPU --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> transpose_dense_matrix.llvm
22
# RUN: mlir-cpu-runner transpose_dense_matrix.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s
33

44
#TODO(gkestor): read dense input from file
@@ -22,4 +22,5 @@ def main() {
2222

2323
# Print the result for verification.
2424
# CHECK: data =
25-
# CHECK-NEXT: 3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,
25+
# CHECK-NEXT: 3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,3.2,
26+
# REQUIRES: gpu_target_enabled
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# RUN: comet-opt --target=GPU --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> eltwise_monoidMin_DensexDense_oDense.llvm
2+
# RUN: mlir-cpu-runner eltwise_monoidMin_DensexDense_oDense.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s
3+
4+
5+
def main() {
6+
#IndexLabel Declarations
7+
IndexLabel [a] = [4];
8+
IndexLabel [b] = [4];
9+
10+
#Tensor Declarations
11+
Tensor<double> A([a, b], {Dense});
12+
Tensor<double> B([a, b], {Dense});
13+
Tensor<double> C([a, b], {Dense});
14+
15+
#Tensor Readfile Operation
16+
A[a, b] = 2.7;
17+
B[a, b] = 3.2;
18+
19+
#If output tensor is dense, it needs to be initialized to 0
20+
C[a, b] = 0.0;
21+
22+
#Tensor Contraction
23+
C[a, b] = A[a, b] @(min) B[a, b];
24+
25+
print(C);
26+
}
27+
28+
# Print the result for verification.
29+
# CHECK: data =
30+
# CHECK-NEXT: 2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,2.7,
31+
# REQUIRES: gpu_target_enabled
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# RUN: comet-opt --target=GPU --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> eltwise_monoidMinus_DensexDense_oDense.llvm
2+
# RUN: mlir-cpu-runner eltwise_monoidMinus_DensexDense_oDense.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s
3+
4+
5+
def main() {
6+
#IndexLabel Declarations
7+
IndexLabel [a] = [4];
8+
IndexLabel [b] = [4];
9+
10+
#Tensor Declarations
11+
Tensor<double> A([a, b], {Dense});
12+
Tensor<double> B([a, b], {Dense});
13+
Tensor<double> C([a, b], {Dense});
14+
15+
#Tensor Readfile Operation
16+
A[a, b] = 4.2;
17+
B[a, b] = 2.7;
18+
19+
#If output tensor is dense, it needs to be initialized to 0
20+
C[a, b] = 0.0;
21+
22+
#Tensor Contraction
23+
C[a, b] = A[a, b] @(-) B[a, b];
24+
25+
print(C);
26+
}
27+
28+
# Print the result for verification.
29+
# CHECK: data =
30+
# CHECK-NEXT: 1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,1.5,
31+
# REQUIRES: gpu_target_enabled
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# RUN: comet-opt --target=GPU --convert-ta-to-it --convert-to-loops --convert-to-triton --convert-to-llvm %s &> eltwise_monoidPlus_DensexDense_oDense.llvm
2+
# RUN: mlir-cpu-runner eltwise_monoidPlus_DensexDense_oDense.llvm -O3 -e main -entry-point-result=void -shared-libs=%comet_utility_library_dir/libcomet_runner_utils%shlibext | FileCheck %s
3+
4+
5+
def main() {
6+
#IndexLabel Declarations
7+
IndexLabel [a] = [4];
8+
IndexLabel [b] = [4];
9+
10+
#Tensor Declarations
11+
Tensor<double> A([a, b], {Dense});
12+
Tensor<double> B([a, b], {Dense});
13+
Tensor<double> C([a, b], {Dense});
14+
15+
#Tensor Readfile Operation
16+
A[a, b] = 2.7;
17+
B[a, b] = 3.2;
18+
19+
#If output tensor is dense, it needs to be initialized to 0
20+
C[a, b] = 0.0;
21+
22+
#Tensor Contraction
23+
C[a, b] = A[a, b] @(+) B[a, b];
24+
25+
print(C);
26+
}
27+
28+
# Print the result for verification.
29+
# CHECK: data =
30+
# CHECK-NEXT: 5.9,5.9,5.9,5.9,5.9,5.9,5.9,5.9,5.9,5.9,5.9,5.9,5.9,5.9,5.9,5.9,
31+
# REQUIRES: gpu_target_enabled

0 commit comments

Comments
 (0)