diff --git a/backends/vulkan/runtime/api/Tensor.cpp b/backends/vulkan/runtime/api/Tensor.cpp
index bffe00c836b..ebd78eac20c 100644
--- a/backends/vulkan/runtime/api/Tensor.cpp
+++ b/backends/vulkan/runtime/api/Tensor.cpp
@@ -13,80 +13,6 @@ namespace vkcompute {
 
 namespace {
 
-/*
- * Calculates the strides of a contiguous tensor. empty_tensor_restride from
- * TensorImpl.h was used as a reference.
- */
-std::vector<int64_t> calc_contiguous_strides(
-    const std::vector<int64_t>& sizes) {
-  int64_t ndim = static_cast<int64_t>(sizes.size());
-  std::vector<int64_t> strides(ndim);
-
-  int64_t running_product = 1;
-  if (ndim >= 1) {
-    strides.at(ndim - 1) = running_product;
-    for (int i = static_cast<int>(sizes.size()) - 2; i >= 0; --i) {
-      running_product *= sizes.at(i + 1);
-      strides.at(i) = running_product;
-    }
-  }
-
-  return strides;
-}
-
-std::vector<int64_t> calc_channels_last_strides(
-    const std::vector<int64_t>& sizes) {
-  std::vector<int64_t> strides(sizes.size());
-
-  switch (sizes.size()) {
-    case 4:
-      strides.at(1) = 1;
-      strides.at(3) = sizes.at(1);
-      strides.at(2) = strides.at(3) * sizes.at(3);
-      strides.at(0) = strides.at(2) * sizes.at(2);
-      return strides;
-    case 3:
-      strides.at(0) = 1;
-      strides.at(2) = sizes.at(0);
-      strides.at(1) = strides.at(2) * sizes.at(2);
-      return strides;
-    default:
-      VK_THROW("ChannelsLast format only available for 3 <= ndim <= 4!");
-  }
-
-  return strides;
-}
-
-/*
- * Calculates the strides of a tensor based on the sizes and memory format. Note
- * that strides are only valid for vTensors that are backed by buffer storage;
- * if texture storage is used then the strides are invalid and set to zeros.
- */
-std::vector<int64_t> calc_strides(
-    const std::vector<int64_t>& sizes,
-    const api::GPUMemoryLayout memory_layout,
-    const api::StorageType storage_type) {
-  switch (storage_type) {
-    case api::kBuffer:
-      switch (memory_layout) {
-        case api::kWidthPacked:
-          return calc_contiguous_strides(sizes);
-          break;
-        case api::kChannelsPacked:
-          return calc_channels_last_strides(sizes);
-          break;
-        default:
-          VK_THROW("Invalid memory format used to create vTensor!");
-      }
-      break;
-    case api::kTexture3D:
-    case api::kTexture2D:
-      return std::vector<int64_t>(sizes.size());
-    default:
-      VK_THROW("Invalid storage type used to create vTensor!");
-  }
-}
-
 /*
  * When stored on the GPU, one dimension will be aligned to the next multiple of
  * 4 in order to take advantage of vec4 data types. The dimension that is
@@ -176,11 +102,11 @@ api::utils::uvec3 create_image_extents(
 
     switch (memory_layout) {
       case api::kWidthPacked:
-        VK_CHECK_COND(width % 4 == 0, "Channels must be divisible by 4!");
+        VK_CHECK_COND(width % 4 == 0, "Width must be divisible by 4!");
         width /= 4;
         break;
       case api::kHeightPacked:
-        VK_CHECK_COND(height % 4 == 0, "Channels must be divisible by 4!");
+        VK_CHECK_COND(height % 4 == 0, "Height must be divisible by 4!");
         height /= 4;
         break;
       case api::kChannelsPacked:
@@ -212,23 +138,19 @@ vTensor::vTensor(
       memory_layout_(memory_layout),
       // Calculate sizes and strides
       sizes_(sizes.begin(), sizes.end()),
-      strides_{calc_strides(sizes, memory_layout_, storage_type)},
       gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
-      gpu_strides_{calc_strides(gpu_sizes_, memory_layout_, storage_type)},
-      virtual_extents_(
-          create_image_extents(gpu_sizes_, storage_type, memory_layout)),
       // Utility Uniform Buffers that can be passed to shaders as arguments
       cpu_sizes_uniform_(nullptr),
       gpu_sizes_uniform_(nullptr),
       extents_uniform_(nullptr),
       // Construct Tensor storage
-      view_(std::make_shared<vTensorStorage>(
+      storage_(
           context,
           storage_type,
           memory_layout_,
           gpu_sizes_,
           dtype_,
-          allocate_memory)) {
+          allocate_memory) {
   if (dtype == api::kHalf) {
     VK_CHECK_COND(
         api::context()->adapter_ptr()->has_16bit_storage(),
@@ -237,73 +159,40 @@ vTensor::vTensor(
   }
 }
 
-vTensor::vTensor(
-    api::Context* const context,
-    const std::vector<int64_t>& sizes,
-    double q_scale,
-    int64_t q_zero_point,
-    const api::ScalarType dtype,
-    const api::StorageType storage_type,
-    const api::GPUMemoryLayout memory_layout)
-    : dtype_(dtype),
-      memory_layout_(memory_layout),
-      // Calculate sizes and strides
-      sizes_(sizes.begin(), sizes.end()),
-      strides_{calc_strides(sizes, memory_layout_, storage_type)},
-      gpu_sizes_{calc_gpu_sizes(sizes, memory_layout_, storage_type)},
-      gpu_strides_{calc_strides(gpu_sizes_, memory_layout_, storage_type)},
-      virtual_extents_(
-          create_image_extents(gpu_sizes_, storage_type, memory_layout)),
-      // Vulkan uniform buffer containing sizes and stride info
-      cpu_sizes_uniform_(nullptr),
-      gpu_sizes_uniform_(nullptr),
-      extents_uniform_(nullptr),
-      // Quantization params
-      is_quantized_{true},
-      q_scale_{q_scale},
-      q_zero_point_{q_zero_point},
-      // Construct Tensor storage
-      view_(std::make_shared<vTensorStorage>(
-          context,
-          storage_type,
-          memory_layout_,
-          gpu_sizes_,
-          dtype_)) {}
-
 api::VulkanImage& vTensor::image(
     api::PipelineBarrier& pipeline_barrier,
-    const api::PipelineStageFlags stage) const& {
-  view_->transition(pipeline_barrier, stage, api::MemoryAccessType::READ);
-  return view_->image_;
+    const api::PipelineStageFlags stage) & {
+  storage_.transition(pipeline_barrier, stage, api::MemoryAccessType::READ);
+  return storage_.image_;
 }
 
 api::VulkanImage& vTensor::image(
     api::PipelineBarrier& pipeline_barrier,
     const api::PipelineStageFlags stage,
     const api::MemoryAccessFlags access) & {
-  view_->transition(pipeline_barrier, stage, access);
-  return view_->image_;
+  storage_.transition(pipeline_barrier, stage, access);
+  return storage_.image_;
 }
 
 api::VulkanBuffer& vTensor::buffer(
     api::PipelineBarrier& pipeline_barrier,
-    const api::PipelineStageFlags stage) const& {
-  view_->transition(pipeline_barrier, stage, api::MemoryAccessType::READ);
-  return view_->buffer_;
+    const api::PipelineStageFlags stage) & {
+  storage_.transition(pipeline_barrier, stage, api::MemoryAccessType::READ);
+  return storage_.buffer_;
 }
 
 api::VulkanBuffer& vTensor::buffer(
     api::PipelineBarrier& pipeline_barrier,
     const api::PipelineStageFlags stage,
     const api::MemoryAccessFlags access) & {
-  view_->transition(pipeline_barrier, stage, access);
-  return view_->buffer_;
+  storage_.transition(pipeline_barrier, stage, access);
+  return storage_.buffer_;
 }
 
 std::shared_ptr<api::UniformParamsBuffer> vTensor::cpu_sizes_ubo() {
   if (!cpu_sizes_uniform_) {
     cpu_sizes_uniform_.reset(new api::UniformParamsBuffer(
-        view_->context_, api::utils::make_whcn_ivec4(sizes_)));
+        storage_.context_, api::utils::make_whcn_ivec4(sizes_)));
   }
   return cpu_sizes_uniform_;
 }
@@ -311,7 +200,7 @@ std::shared_ptr<api::UniformParamsBuffer> vTensor::cpu_sizes_ubo() {
 std::shared_ptr<api::UniformParamsBuffer> vTensor::gpu_sizes_ubo() {
   if (!gpu_sizes_uniform_) {
     gpu_sizes_uniform_.reset(new api::UniformParamsBuffer(
-        view_->context_, api::utils::make_whcn_ivec4(gpu_sizes_)));
+        storage_.context_, api::utils::make_whcn_ivec4(gpu_sizes_)));
   }
   return gpu_sizes_uniform_;
 }
@@ -319,11 +208,11 @@ std::shared_ptr<api::UniformParamsBuffer> vTensor::gpu_sizes_ubo() {
 std::shared_ptr<api::UniformParamsBuffer> vTensor::extents_ubo() {
   if (!extents_uniform_) {
     extents_uniform_.reset(new api::UniformParamsBuffer(
-        view_->context_,
+        storage_.context_,
         api::utils::uvec4(
-            {view_->extents_.data[0],
-             view_->extents_.data[1],
-             view_->extents_.data[2],
+            {storage_.extents_.data[0],
+             storage_.extents_.data[1],
+             storage_.extents_.data[2],
              1u})));
   }
   return extents_uniform_;
@@ -332,10 +221,10 @@ std::shared_ptr<api::UniformParamsBuffer> vTensor::extents_ubo() {
 VmaAllocationCreateInfo vTensor::get_allocation_create_info() const {
   switch (storage_type()) {
     case api::kBuffer:
-      return view_->buffer_.allocation_create_info();
+      return storage_.buffer_.allocation_create_info();
     case api::kTexture2D:
     case api::kTexture3D:
-      return view_->image_.allocation_create_info();
+      return storage_.image_.allocation_create_info();
   }
   return {};
 }
@@ -343,10 +232,10 @@ VmaAllocationCreateInfo vTensor::get_allocation_create_info() const {
 VkMemoryRequirements vTensor::get_memory_requirements() const {
   switch (storage_type()) {
     case api::kBuffer:
-      return view_->buffer_.get_memory_requirements();
+      return storage_.buffer_.get_memory_requirements();
     case api::kTexture2D:
     case api::kTexture3D:
-      return view_->image_.get_memory_requirements();
+      return storage_.image_.get_memory_requirements();
   }
   return {};
 }
@@ -354,11 +243,11 @@ VkMemoryRequirements vTensor::get_memory_requirements() const {
 void vTensor::bind_allocation(const api::MemoryAllocation& allocation) {
   switch (storage_type()) {
     case api::kBuffer:
-      view_->buffer_.bind_allocation(allocation);
+      storage_.buffer_.bind_allocation(allocation);
       break;
     case api::kTexture2D:
     case api::kTexture3D:
-      view_->image_.bind_allocation(allocation);
+      storage_.image_.bind_allocation(allocation);
       break;
   }
 }
@@ -366,7 +255,7 @@ void vTensor::bind_allocation(const api::MemoryAllocation& allocation) {
 void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
   sizes_ = new_sizes;
   gpu_sizes_ = calc_gpu_sizes(sizes_, memory_layout_, storage_type());
-  virtual_extents_ =
+  api::utils::uvec3 virtual_extents =
       create_image_extents(gpu_sizes_, storage_type(), memory_layout_);
 
   if (cpu_sizes_uniform_) {
@@ -379,16 +268,16 @@ void vTensor::update_size_metadata(const std::vector<int64_t>& new_sizes) {
 
   if (extents_uniform_) {
     extents_uniform_->update(api::utils::uvec4(
-        {virtual_extents_.data[0],
-         virtual_extents_.data[1],
-         virtual_extents_.data[2],
+        {virtual_extents.data[0],
+         virtual_extents.data[1],
+         virtual_extents.data[2],
          1u}));
   }
 }
 
 void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
   update_size_metadata(new_sizes);
-  view_->discard_and_reallocate(
+  storage_.discard_and_reallocate(
       calc_gpu_sizes(new_sizes, memory_layout_, storage_type()),
       memory_layout_,
       dtype_);
@@ -396,30 +285,6 @@ void vTensor::reallocate(const std::vector<int64_t>& new_sizes) {
 
 void vTensor::virtual_resize(const std::vector<int64_t>& new_sizes) {
   update_size_metadata(new_sizes);
-  if (storage_type() == api::kBuffer) {
-    if (gpu_nbytes() > view_->buffer_.mem_size()) {
-      VK_THROW(
-          "Cannot virtual_resize a vTensor with sizes that require a larger "
-          "buffer! reallocate() should be used instead.");
-    }
-  } else {
-    bool valid_resize = true;
-    if (virtual_extents_.data[0] > view_->extents_.data[0]) {
-      valid_resize = false;
-    }
-    if (virtual_extents_.data[1] > view_->extents_.data[1]) {
-      valid_resize = false;
-    }
-    if (virtual_extents_.data[2] > view_->extents_.data[2]) {
-      valid_resize = false;
-    }
-
-    if (!valid_resize) {
-      VK_THROW(
-          "Cannot virtual_resize a vTensor with sizes that require a larger "
-          "image texture! reallocate() should be used instead.");
-    }
-  }
 }
 
 //
@@ -442,7 +307,7 @@ api::VulkanImage allocate_image(
   };
 
   VkImageType image_type = VK_IMAGE_TYPE_3D;
-  VkImageViewType image_view_type = VK_IMAGE_VIEW_TYPE_3D;
+  VkImageViewType image_view_type;
 
   switch (storage_type) {
     case api::kTexture3D:
@@ -584,39 +449,6 @@ void vTensorStorage::transition(
   last_access_.access = cur_access;
 }
 
-void add_buffer_barrier(
-    api::PipelineBarrier& pipeline_barrier,
-    const api::VulkanBuffer& buffer,
-    const api::PipelineStageFlags prev_stage,
-    const api::MemoryAccessFlags prev_access,
-    const api::PipelineStageFlags cur_stage,
-    const api::MemoryAccessFlags cur_access) {
-  // Check for RAW
-  const bool read_requested = (cur_access & api::MemoryAccessType::READ) != 0;
-  const bool prev_written = (prev_access & api::MemoryAccessType::WRITE) != 0;
-
-  const bool is_RAW = read_requested && prev_written;
-
-  if (is_RAW) {
-    VkPipelineStageFlags src_stage = api::vk_stage(prev_stage);
-    if (0u == src_stage) {
-      src_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
-    }
-    VkPipelineStageFlags dst_stage = api::vk_stage(cur_stage);
-    if (0u == dst_stage) {
-      dst_stage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
-    }
-
-    pipeline_barrier.stage.src |= src_stage;
-    pipeline_barrier.stage.dst |= dst_stage;
-
-    pipeline_barrier.buffers.emplace_back(
-        api::vk_access(prev_stage, prev_access),
-        api::vk_access(cur_stage, cur_access),
-        buffer);
-  }
-}
-
 void vTensorStorage::discard_and_reallocate(
     const std::vector<int64_t>& gpu_sizes,
     const api::GPUMemoryLayout gpu_memory_layout,
diff --git a/backends/vulkan/runtime/api/Tensor.h b/backends/vulkan/runtime/api/Tensor.h
index 110e94ab943..ba9c99c4bf0 100644
--- a/backends/vulkan/runtime/api/Tensor.h
+++ b/backends/vulkan/runtime/api/Tensor.h
@@ -42,11 +42,11 @@ class vTensorStorage final {
       const api::ScalarType dtype,
       const bool allocate_memory = true);
 
-  vTensorStorage(const vTensorStorage&) = delete;
-  vTensorStorage& operator=(const vTensorStorage&) = delete;
+  vTensorStorage(const vTensorStorage& other) = delete;
+  vTensorStorage& operator=(const vTensorStorage& other) = delete;
 
-  vTensorStorage(vTensorStorage&&) = default;
-  vTensorStorage operator=(vTensorStorage&&) = delete;
+  vTensorStorage(vTensorStorage&& other) = default;
+  vTensorStorage& operator=(vTensorStorage&& other) = default;
 
   ~vTensorStorage();
 
@@ -95,11 +95,7 @@ class vTensorStorage final {
 
 class vTensor final {
  public:
-  // Do not allow empty vTensor construction
-  vTensor() = default;
-
-  // Default constructor
-  vTensor(
+  explicit vTensor(
       api::Context* context,
       const std::vector<int64_t>& sizes,
       const api::ScalarType dtype,
@@ -107,47 +103,18 @@ class vTensor final {
       const api::GPUMemoryLayout memory_layout = api::kChannelsPacked,
       const bool allocate_memory = true);
 
-  // Default constructor for quantized vTensor
-  vTensor(
-      api::Context* const context,
-      const std::vector<int64_t>& sizes,
-      double q_scale,
-      int64_t q_zero_point,
-      const api::ScalarType dtype,
-      const api::StorageType storage_type = api::kTexture3D,
-      const api::GPUMemoryLayout memory_layout = api::kChannelsPacked);
-
-  // Copy Constructor and Assignment; Ideally copying  would be disabled
-  // (see the reasoning for move assignment below) but it is required for
-  // compatibility with OpaqueTensorImpl
-  vTensor(const vTensor& other) = default;
-  vTensor& operator=(const vTensor& other) = default;
+  vTensor(const vTensor& other) = delete;
+  vTensor& operator=(const vTensor& other) = delete;
 
-  // Move Constructor and assignment
   vTensor(vTensor&& other) = default;
   vTensor& operator=(vTensor&& other) = default;
 
  private:
-  // Tensor Options
   api::ScalarType dtype_;
-
-  // GPU specific memory layout qualifier
   api::GPUMemoryLayout memory_layout_;
 
-  // Sizes and Strides
   std::vector<int64_t> sizes_;
-  std::vector<int64_t> strides_;
-
-  // Storage Dimensions. When stored on the GPU, one dimension will be aligned
-  // to the next multiple of 4 in order to take advantage of vec4 data types.
   std::vector<int64_t> gpu_sizes_;
-  std::vector<int64_t> gpu_strides_;
-
-  // The extents that correspond to the tensor's size metadata. Note that this
-  // may not be the same as the extents of the underlying image texture because
-  // vTensor can be virtually resized via virtual_resize() which will cause it
-  // to be interpreted as a tensor with a different size.
-  api::utils::uvec3 virtual_extents_;
 
   // A Vulkan uniform buffer containing the tensor sizes in WHCN that can be
   // passed into a shader.
@@ -163,45 +130,20 @@ class vTensor final {
   // image texture that can be passed into a shader.
   std::shared_ptr<api::UniformParamsBuffer> extents_uniform_;
 
-  // Quantization params
-  bool is_quantized_{false};
-  double q_scale_{1.0f};
-  int64_t q_zero_point_{0u};
-
-  // Even at the cost of a heap allocation plus the resulting negative impact
-  // on cache locality due to the subsequent pointer chasing, it is still
-  // critical to share the view across vTensor implementations to minimize
-  // programmer errors.  Ideally this class should have been only made movable,
-  // and non-copyable - something we cannot do unfortunately due to the inner
-  // workings of at::TensorImpl requiring copy semantics in
-  // at::TensorImpl::release_resources() to function as expected.  Now that this
-  // class is made copyable though, a new door to a whole new class of bugs is
-  // opened, in that there now is a chance of two [shallow] copies, have their
-  // StorageState objects go out of sync as a result of an operation being
-  // performed on one shallow copy that is not reflected in the other.
-  // Technically, if the programmer is very careful, it is possible to avoid
-  // this trap and not pay the cost of indirection, but the resulting bugs of
-  // missing memory barriers will be so frustrating to hunt down for those
-  // unfamiliar with the internal mechanics of this class, that I decided to
-  // take the performance penalty of this extra layer of indirection in favor
-  // of making this class easier to use.
-  std::shared_ptr<vTensorStorage> view_;
+  vTensorStorage storage_;
 
  public:
   /*
    Texture Access
   */
 
-  inline api::StorageType storage_type() const {
-    return view_->storage_type_;
-  }
-
   inline api::VulkanImage& image() const& {
-    return view_->image_;
+    return storage_.image_;
   }
 
-  api::VulkanImage& image(api::PipelineBarrier&, const api::PipelineStageFlags)
-      const&;
+  api::VulkanImage& image(
+      api::PipelineBarrier&,
+      const api::PipelineStageFlags) &;
 
   api::VulkanImage& image(
       api::PipelineBarrier&,
@@ -209,12 +151,12 @@ class vTensor final {
       const api::MemoryAccessFlags) &;
 
   inline api::VulkanBuffer& buffer() const& {
-    return view_->buffer_;
+    return storage_.buffer_;
   }
 
   api::VulkanBuffer& buffer(
       api::PipelineBarrier&,
-      const api::PipelineStageFlags) const&;
+      const api::PipelineStageFlags) &;
 
   api::VulkanBuffer& buffer(
       api::PipelineBarrier&,
@@ -225,8 +167,12 @@ class vTensor final {
     Metadata
   */
 
+  inline api::StorageType storage_type() const {
+    return storage_.storage_type_;
+  }
+
   inline const api::utils::uvec3& extents() const {
-    return view_->extents_;
+    return storage_.extents_;
   }
 
   /*
@@ -236,20 +182,12 @@ class vTensor final {
     return dtype_;
   }
 
-  /*
-   * Get an `api::ScalarType` that corresponds to the image format of the
-   * texture
-   */
-  inline api::ScalarType texture_dtype() const {
-    return api::element_scalartype(view_->texture_format());
-  }
-
   inline api::GPUMemoryLayout gpu_memory_layout() const {
     return memory_layout_;
   }
 
-  inline uint32_t gpu_memory_layout_as_uint() const {
-    return static_cast<uint32_t>(memory_layout_);
+  inline int32_t gpu_memory_layout_int() const {
+    return static_cast<int32_t>(memory_layout_);
   }
 
   inline const std::vector<int64_t>& sizes() const {
@@ -264,22 +202,6 @@ class vTensor final {
     return sizes_.size();
   }
 
-  inline const std::vector<int64_t>& strides() const {
-    return strides_;
-  }
-
-  inline const std::vector<int64_t>& gpu_sizes() const {
-    return gpu_sizes_;
-  }
-
-  inline const std::vector<int64_t>& gpu_strides() const {
-    return gpu_strides_;
-  }
-
-  inline const api::utils::uvec3& virtual_extents() const {
-    return virtual_extents_;
-  }
-
   /*
    * Get a uniform buffer object containing the tensor sizes to use in a compute
    * shader. Note that the UBO will be created the first time this function is
@@ -301,38 +223,6 @@ class vTensor final {
    */
   std::shared_ptr<api::UniformParamsBuffer> extents_ubo();
 
-  inline void set_is_quantized() {
-    is_quantized_ = true;
-  }
-
-  inline bool is_quantized() const {
-    return is_quantized_;
-  }
-
-  inline void set_scale(const double q_scale) {
-    q_scale_ = q_scale;
-  }
-
-  inline double get_scale() const {
-    return q_scale_;
-  }
-
-  inline float get_scale_float() const {
-    return api::utils::safe_downcast<float>(q_scale_);
-  }
-
-  inline void set_zero_point(const int64_t q_zero_point) {
-    q_zero_point_ = q_zero_point;
-  }
-
-  inline int64_t get_zero_point() const {
-    return q_zero_point_;
-  }
-
-  inline int32_t get_zero_point_int32() const {
-    return api::utils::safe_downcast<int32_t>(q_zero_point_);
-  }
-
   inline size_t numel() const {
     return api::utils::multiply_integers(sizes());
   }
@@ -349,7 +239,7 @@ class vTensor final {
   }
 
   /*
-   * Return nbytes but bnased on gpu_sizes_ instead of sizes_
+   * Return nbytes but based on gpu_sizes_ instead of sizes_
    */
   inline VkDeviceSize gpu_nbytes() const {
     return api::element_size(dtype()) * gpu_numel();
@@ -392,12 +282,4 @@ class vTensor final {
   void virtual_resize(const std::vector<int64_t>& new_sizes);
 };
 
-void add_buffer_barrier(
-    api::PipelineBarrier&,
-    const api::VulkanBuffer&,
-    const api::PipelineStageFlags,
-    const api::MemoryAccessFlags,
-    const api::PipelineStageFlags,
-    const api::MemoryAccessFlags);
-
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/graph/ops/impl/Permute.cpp b/backends/vulkan/runtime/graph/ops/impl/Permute.cpp
index 2e295845c13..ce2ca463871 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Permute.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Permute.cpp
@@ -79,7 +79,7 @@ void add_permute_node(
       {out_c_aligned, in_c_aligned},
   };
 
-  api::utils::uvec3 global_size = t_out->virtual_extents();
+  api::utils::uvec3 global_size = t_out->extents();
   api::utils::uvec3 local_size = adaptive_work_group_size(global_size);
 
   graph.execute_nodes().emplace_back(new ExecuteNode(
diff --git a/backends/vulkan/runtime/graph/ops/impl/Select.cpp b/backends/vulkan/runtime/graph/ops/impl/Select.cpp
index e0412450ed6..1db7ba82b65 100644
--- a/backends/vulkan/runtime/graph/ops/impl/Select.cpp
+++ b/backends/vulkan/runtime/graph/ops/impl/Select.cpp
@@ -102,7 +102,7 @@ void add_select_int_node(
   kernel_name.reserve(kShaderNameReserve);
   add_dtype_suffix(kernel_name, *t_out);
 
-  api::utils::uvec3 global_size = t_out->virtual_extents();
+  api::utils::uvec3 global_size = t_out->extents();
   api::utils::uvec3 local_size = adaptive_work_group_size(global_size);
 
   // TODO: add resizing to support dynamic shapes.
diff --git a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp
index bc02595a6fd..e05632c2afc 100644
--- a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp
+++ b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp
@@ -96,10 +96,6 @@ void set_staging_zeros(api::StorageBuffer& staging, const size_t nbytes) {
 }
 
 api::ShaderInfo get_nchw_to_image_shader(const vTensor& v_dst) {
-  if (v_dst.is_quantized()) {
-    VK_THROW("Quantized Tensors are currently not supported!");
-  }
-
   std::string kernel_name;
   kernel_name.reserve(kShaderNameReserve);
 
@@ -121,10 +117,6 @@ api::ShaderInfo get_nchw_to_image_shader(const vTensor& v_dst) {
 }
 
 api::ShaderInfo get_image_to_nchw_shader(const vTensor& v_src) {
-  if (v_src.is_quantized()) {
-    VK_THROW("Quantized Tensors are currently not supported!");
-  }
-
   std::string kernel_name;
   kernel_name.reserve(kShaderNameReserve);
 
diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp
index 21198a9c8e3..cbba7c45925 100644
--- a/backends/vulkan/test/utils/test_utils.cpp
+++ b/backends/vulkan/test/utils/test_utils.cpp
@@ -26,8 +26,8 @@ void record_nchw_to_image_op(
   context->submit_compute_job(
       get_nchw_to_image_shader(v_dst),
       pipeline_barrier,
-      v_dst.virtual_extents(),
-      adaptive_work_group_size(v_dst.virtual_extents()),
+      v_dst.extents(),
+      adaptive_work_group_size(v_dst.extents()),
       specialization_constants,
       VK_NULL_HANDLE,
       v_dst.image(
@@ -49,8 +49,8 @@ void record_image_to_nchw_op(
   context->submit_compute_job(
       get_image_to_nchw_shader(v_src),
       pipeline_barrier,
-      v_src.virtual_extents(),
-      adaptive_work_group_size(v_src.virtual_extents()),
+      v_src.extents(),
+      adaptive_work_group_size(v_src.extents()),
       specialization_constants,
       VK_NULL_HANDLE,
       v_src.image(pipeline_barrier, api::PipelineStage::COMPUTE),
@@ -87,8 +87,8 @@ void record_conv2d_prepack_weights_op(
   context->submit_compute_job(
       shader,
       pipeline_barrier,
-      v_dst.virtual_extents(),
-      adaptive_work_group_size(v_dst.virtual_extents()),
+      v_dst.extents(),
+      adaptive_work_group_size(v_dst.extents()),
       specialization_constants,
       VK_NULL_HANDLE,
       v_dst.image(
@@ -115,8 +115,8 @@ void record_binary_op(
   context->submit_compute_job(
       VK_KERNEL_FROM_STR(kernel_name),
       pipeline_barrier,
-      v_dst.virtual_extents(),
-      adaptive_work_group_size(v_dst.virtual_extents()),
+      v_dst.extents(),
+      adaptive_work_group_size(v_dst.extents()),
       specialization_constants,
       VK_NULL_HANDLE,
       v_dst.image(
diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp
index 5dc53ca4acb..0d89d618166 100644
--- a/backends/vulkan/test/vulkan_compute_api_test.cpp
+++ b/backends/vulkan/test/vulkan_compute_api_test.cpp
@@ -915,7 +915,7 @@ void run_from_gpu_test(
     api::context()->submit_compute_job(
         VK_KERNEL_FROM_STR(kernel_name),
         pipeline_barrier,
-        vten.virtual_extents(),
+        vten.extents(),
         {4, 4, 4},
         specialization_constants,
         VK_NULL_HANDLE,