apache
diff --git a/‎CMakeLists.txt‎
Lines changed: 9 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎cmake/config.cmake‎
Lines changed: 12 additions & 0 deletions b/‎cmake/config.cmake‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎cmake/modules/Vulkan.cmake‎
Lines changed: 5 additions & 1 deletion b/‎cmake/modules/Vulkan.cmake‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎cmake/utils/FindVulkan.cmake‎
Lines changed: 21 additions & 5 deletions b/‎cmake/utils/FindVulkan.cmake‎
Lines changed: 21 additions & 5 deletions
diff --git a/‎python/tvm/relay/op/strategy/cuda.py‎
Lines changed: 7 additions & 3 deletions b/‎python/tvm/relay/op/strategy/cuda.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎python/tvm/relay/qnn/op/legalizations.py‎
Lines changed: 15 additions & 3 deletions b/‎python/tvm/relay/qnn/op/legalizations.py‎
Lines changed: 15 additions & 3 deletions
diff --git a/‎python/tvm/target/target.py‎
Lines changed: 6 additions & 0 deletions b/‎python/tvm/target/target.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎python/tvm/topi/cuda/conv2d_alter_op.py‎
Lines changed: 7 additions & 3 deletions b/‎python/tvm/topi/cuda/conv2d_alter_op.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎python/tvm/topi/cuda/conv2d_int8.py‎
Lines changed: 12 additions & 6 deletions b/‎python/tvm/topi/cuda/conv2d_int8.py‎
Lines changed: 12 additions & 6 deletions
diff --git a/‎python/tvm/topi/cuda/dense.py‎
Lines changed: 9 additions & 4 deletions b/‎python/tvm/topi/cuda/dense.py‎
Lines changed: 9 additions & 4 deletions
@@ -26,6 +26,15 @@ endif()
 tvm_option(USE_CUDA "Build with CUDA" OFF)
 tvm_option(USE_OPENCL "Build with OpenCL" OFF)
 tvm_option(USE_VULKAN "Build with Vulkan" OFF)
+
+# Whether to use spirv-tools.and SPIRV-Headers from Khronos github or gitlab.
+#
+# Possible values:
+# - OFF: not to use
+# - /path/to/install: path to your khronis spirv-tools and SPIRV-Headers installation directory
+#
+tvm_option(USE_KHRONOS_SPIRV "Whether to use spirv-tools.and SPIRV-Headers from Khronos github or gitlab" OFF)
+tvm_option(USE_SPIRV_KHR_INTEGER_DOT_PRODUCT "whether enable SPIRV_KHR_DOT_PRODUCT" OFF)
 tvm_option(USE_METAL "Build with Metal" OFF)
 tvm_option(USE_ROCM "Build with ROCM" OFF)
 tvm_option(ROCM_PATH "The path to rocm" /opt/rocm)
 
@@ -81,6 +81,18 @@ set(USE_METAL OFF)
 # - /path/to/vulkan-sdk: use specific path to vulkan-sdk
 set(USE_VULKAN OFF)
 
+
+# Whether to use spirv-tools.and SPIRV-Headers from Khronos github or gitlab.
+#
+# Possible values:
+# - OFF: not to use
+# - /path/to/install: path to your khronis spirv-tools and SPIRV-Headers installation directory
+#
+set(USE_KHRONOS_SPIRV OFF)
+
+# whether enable SPIRV_KHR_DOT_PRODUCT
+set(USE_SPIRV_KHR_INTEGER_DOT_PRODUCT OFF)
+
 # Whether enable OpenGL runtime
 set(USE_OPENGL OFF)
 
 
@@ -16,12 +16,16 @@
 # under the License.
 
 # Be compatible with older version of CMake
-find_vulkan(${USE_VULKAN})
+find_vulkan(${USE_VULKAN} ${USE_KHRONOS_SPIRV})
 
 if(USE_VULKAN)
   if(NOT Vulkan_FOUND)
     message(FATAL_ERROR "Cannot find Vulkan, USE_VULKAN=" ${USE_VULKAN})
   endif()
+  if (USE_SPIRV_KHR_INTEGER_DOT_PRODUCT)
+    add_definitions(-DTVM_SPIRV_KHR_INTEGER_DOT_PRODUCT=1)
+    message(STATUS "Enable SPIRV_KHR_INTEGER_DOT_PRODUCT")
+  endif()
   include_directories(SYSTEM ${Vulkan_INCLUDE_DIRS})
   message(STATUS "Build with Vulkan support")
   tvm_file_glob(GLOB RUNTIME_VULKAN_SRCS src/runtime/vulkan/*.cc)
 
@@ -32,7 +32,7 @@
 # - Vulkan_SPIRV_TOOLS_LIBRARY
 #
 
-macro(find_vulkan use_vulkan)
+macro(find_vulkan use_vulkan use_khronos_spirv)
   set(__use_vulkan ${use_vulkan})
   if(IS_DIRECTORY ${__use_vulkan})
     set(__vulkan_sdk ${__use_vulkan})
@@ -43,6 +43,15 @@ macro(find_vulkan use_vulkan)
      set(__vulkan_sdk "")
    endif()
 
+   
+   if(IS_DIRECTORY ${use_khronos_spirv})
+     set(__use_khronos_spirv ${use_khronos_spirv})
+     message(STATUS "Custom khronos spirv PATH=" ${__use_khronos_spirv})
+   else()
+     set(__use_khronos_spirv "")
+   endif()
+
+
    if(__vulkan_sdk)
      set(Vulkan_INCLUDE_DIRS ${__vulkan_sdk}/include)
      find_library(Vulkan_LIBRARY NAMES vulkan vulkan-1 PATHS ${__vulkan_sdk}/lib)
@@ -61,11 +70,18 @@ macro(find_vulkan use_vulkan)
 
   if(Vulkan_FOUND)
     get_filename_component(VULKAN_LIBRARY_PATH ${Vulkan_LIBRARY} DIRECTORY)
-    find_library(Vulkan_SPIRV_TOOLS_LIBRARY SPIRV-Tools
-        HINTS ${VULKAN_LIBRARY_PATH} ${VULKAN_LIBRARY_PATH}/spirv-tools ${VULKAN_SDK}/lib)
+    if (WIN32)
+      find_library(Vulkan_SPIRV_TOOLS_LIBRARY SPIRV-Tools
+        HINTS ${__use_khronos_spirv}/spirv-tools/lib ${VULKAN_LIBRARY_PATH} ${VULKAN_LIBRARY_PATH}/spirv-tools ${VULKAN_SDK}/lib)
+      find_path(_libspirv libspirv.h HINTS ${__use_khronos_spirv}/spirv-tools/include ${Vulkan_INCLUDE_DIRS} PATH_SUFFIXES vulkan spirv-tools)
+      find_path(_spirv spirv.hpp HINTS ${__use_khronos_spirv}/SPIRV-Headers/include ${Vulkan_INCLUDE_DIRS} PATH_SUFFIXES vulkan SPIRV spirv/unified1 spirv-headers)
+    else()
+      find_library(Vulkan_SPIRV_TOOLS_LIBRARY SPIRV-Tools
+          HINTS ${__use_khronos_spirv}/lib ${VULKAN_LIBRARY_PATH} ${VULKAN_LIBRARY_PATH}/spirv-tools ${VULKAN_SDK}/lib)
+      find_path(_libspirv libspirv.h HINTS ${__use_khronos_spirv}/include ${Vulkan_INCLUDE_DIRS} PATH_SUFFIXES vulkan spirv-tools)
+      find_path(_spirv spirv.hpp HINTS ${__use_khronos_spirv}/include ${Vulkan_INCLUDE_DIRS} PATH_SUFFIXES vulkan SPIRV spirv/unified1 spirv-headers)
+    endif()
 
-    find_path(_libspirv libspirv.h HINTS ${Vulkan_INCLUDE_DIRS} PATH_SUFFIXES vulkan spirv-tools)
-    find_path(_spirv spirv.hpp HINTS ${Vulkan_INCLUDE_DIRS} PATH_SUFFIXES vulkan SPIRV spirv/unified1 spirv-headers)
     find_path(_glsl_std GLSL.std.450.h HINTS ${Vulkan_INCLUDE_DIRS} PATH_SUFFIXES vulkan SPIRV spirv/unified1 spirv-headers)
     list(APPEND Vulkan_INCLUDE_DIRS ${_libspirv} ${_spirv} ${_glsl_std})
     message(STATUS "Vulkan_INCLUDE_DIRS=" ${Vulkan_INCLUDE_DIRS})
 
@@ -145,7 +145,7 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target):
         if layout == "NCHW":
             assert kernel_layout == "OIHW"
             if (
-                target.kind.name == "cuda"
+                (target.kind.name in ["cuda", "vulkan"])
                 and data.dtype in ("int8", "uint8")
                 and kernel.dtype in ("int8", "uint8")
             ):
@@ -296,7 +296,11 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target):
                     "Unsupported shape for conv2d HWNC.\
                                     Need to satisfy tensor core schedule."
                 )
-        elif target.kind.name == "cuda" and layout == "NCHW4c" and data.dtype in ["int8", "uint8"]:
+        elif (
+            (target.kind.name in ["cuda", "vulkan"])
+            and layout == "NCHW4c"
+            and data.dtype in ["int8", "uint8"]
+        ):
             assert kernel_layout == "OIHW4o4i"
             strategy.add_implementation(
                 wrap_compute_conv2d(topi.cuda.conv2d_NCHWc_int8, True),
@@ -372,7 +376,7 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target):
             ic_chunk = in_channels // 4
 
             if (
-                target.kind.name == "cuda"
+                (target.kind.name in ["cuda", "vulkan"])
                 and data.dtype in ["int8", "uint8"]
                 and kernel.dtype in ["int8", "uint8"]
                 and channels % groups == 0
 
@@ -387,6 +387,12 @@ def is_aarch64_arm():
     return "aarch64" in target.attrs.get("mtriple", "")
 
 
+def is_vulkan():
+    """Checks whether we are compiling for a vulkan/spirv target."""
+    target = tvm.target.Target.current(allow_none=False)
+    return "vulkan" in target.keys
+
+
 ########################
 # ARM CPU legalizations.
 ########################
@@ -438,17 +444,23 @@ def _qnn_dense_legalize_intel_cpu(attrs, inputs, types):
 
 
 #####################
-# CUDA legalizations.
+# CUDA and vulkan legalizations.
 #####################
 
 
-@qnn_conv2d_legalize.register("cuda")
+@qnn_conv2d_legalize.register(["cuda", "gpu"])
 def _qnn_conv2d_legalize_cuda(attrs, inputs, types):
+    if is_vulkan():
+        # prefers the dtypes to be same. Mixed type is not yet supported.
+        return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.conv2d)
     # CUDA prefers both datatypes to be int8.
     return helper_change_dtypes_to_int8(attrs, inputs, types, relay.qnn.op.conv2d)
 
 
-@qnn_dense_legalize.register("cuda")
+@qnn_dense_legalize.register(["cuda", "gpu"])
 def _qnn_dense_legalize_cuda(attrs, inputs, types):
+    if is_vulkan():
+        # prefers the dtypes to be same. Mixed type is not yet supported.
+        return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.dense)
     # CUDA prefers both datatypes to be the int8.
     return helper_change_dtypes_to_int8(attrs, inputs, types, relay.qnn.op.dense)
@@ -188,6 +188,12 @@ def mattr(self):
         """Returns the mattr from the target if it exists."""
         return list(self.attrs.get("mattr", []))
 
+    @property
+    def supports_integer_dot_product(self):
+        if self.attrs.get("supports_integer_dot_product", []):
+            return bool(self.attrs["supports_integer_dot_product"])
+        return False
+
     @property
     def libs(self):
         return list(self.attrs.get("libs", []))
 
@@ -83,14 +83,18 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type):
     cfg = dispatch_ctx.query(target, workload)
     if cfg.is_fallback:  # if is fallback, clear query cache and return None
         autotvm.task.clear_fallback_cache(target, workload)
-        return None
+        do_new_layout = False
+        if "vulkan" in target.keys:
+            do_new_layout = "+dotprod" in target.mattr or target.supports_integer_dot_product
+        if not do_new_layout:
+            return None
 
     topi_tmpl = workload[0]
     if topi_tmpl == "conv2d_NCHWc_int8.cuda":
         assert data_layout == "NCHW" and kernel_layout == "OIHW"
         N, CI, H, W = get_const_tuple(data.shape)
         CO, _, KH, KW = get_const_tuple(kernel.shape)
-
+        assert CO % 4 == 0, "Number of output channels should be multiple of 4"
         new_layout = "NCHW4c"
         new_attrs["channels"] = CO
         new_attrs["data_layout"] = new_layout
@@ -324,7 +328,7 @@ def _pad_conv2d_NHWC(db, di, do, data, kernel, out_channel, new_attrs, output_te
     return out
 
 
-@conv2d_legalize.register("cuda")
+@conv2d_legalize.register(["cuda", "gpu"])
 def _conv2d_legalize(attrs, inputs, arg_types):
     """Legalizes Conv2D op.
 
 
@@ -153,13 +153,15 @@ def conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, layout, out_
     kh = te.reduce_axis((0, kernel_h), name="kh")
     kw = te.reduce_axis((0, kernel_w), name="kw")
 
+    packed_kernel_dtype = packed_kernel.dtype
+    packed_dtype = "int32" if packed_kernel_dtype == "int8" else "uint32"
     conv = te.compute(
         oshape,
         lambda n, oc_chunk, oh, ow, oc_block: te.sum(
             pad_data[
                 n, icc, oh * stride_h + kh * dilation_h, ow * stride_w + kw * dilation_w, icb
-            ].astype("int32")
-            * packed_kernel[oc_chunk, icc, kh, kw, oc_block, icb].astype("int32"),
+            ].astype(packed_dtype)
+            * packed_kernel[oc_chunk, icc, kh, kw, oc_block, icb].astype(packed_dtype),
             axis=[icc, kh, kw, icb],
         ),
     )
@@ -188,9 +190,6 @@ def conv2d_NCHWc_int8(cfg, data, kernel, stride, padding, dilation, layout, out_
     return output
 
 
-_dp4a = dp4a("shared", "shared", "local")
-
-
 @autotvm.register_topi_schedule("conv2d_NCHWc_int8.cuda")
 def schedule_conv2d_NCHWc_int8(cfg, outs):
     """Schedule conv2d int8 NCHWc template"""
@@ -311,7 +310,14 @@ def _schedule_conv2d_NCHWc_int8(cfg, s, output):
     cfg["reorder_inner"].apply(s, conv, [rci, ryi, rxi])
 
     _, rc_block = s[conv].split(rc_block, factor=4)
-    s[conv].tensorize(rc_block, _dp4a)
+    target = tvm.target.Target.current(allow_none=False)
+    do_tensorize = True
+    if "vulkan" in target.keys:
+        do_tensorize = "+dotprod" in target.mattr or target.supports_integer_dot_product
+
+    if do_tensorize:
+        dtypes = (pad_data.dtype, packed_kernel.dtype)
+        s[conv].tensorize(rc_block, dp4a("shared", "shared", "local", dtypes))
 
     cache_loc = [rco, ryo, rxo][cfg["reorder_inner"].perm[-1]]
     s[AA].compute_at(s[conv], cache_loc)
 
@@ -17,6 +17,7 @@
 # pylint: disable=invalid-name, unused-argument
 """Schedule for dense operator"""
 import logging
+import tvm
 from tvm import te
 import tvm.autotvm as autotvm
 from tvm.contrib import cublas
@@ -133,9 +134,6 @@ def _callback(op):
     return s
 
 
-_dp4a = dp4a("shared", "shared", "local")
-
-
 def _schedule_dense_int8(cfg, s, output):
     data, weight = s[output].op.input_tensors
     if len(weight.op.input_tensors) == 1 and weight.op.input_tensors[0] == data:
@@ -173,7 +171,14 @@ def _schedule_dense_int8(cfg, s, output):
     ko = CC.op.reduce_axis[0]
     ko, ki = s[CC].split(ko, factor=4)
     ko, kt = cfg["tile_k"].apply(s, CC, ko)
-    s[CC].tensorize(ki, _dp4a)
+    target = tvm.target.Target.current(allow_none=False)
+    if (
+        "vulkan" not in target.keys
+        or "+dotprod" in target.mattr
+        or target.supports_integer_dot_product
+    ):
+        dtypes = (data.dtype, weight.dtype)
+        s[CC].tensorize(ki, dp4a("shared", "shared", "local", dtypes))
     by, vy, ty, yi = cfg["tile_y"].apply(s, output, n)
     bx, vx, tx, xi = cfg["tile_x"].apply(s, output, x)