pytorch
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/vulkan/op_registry.py‎
Lines changed: 12 additions & 0 deletions b/‎backends/vulkan/op_registry.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/api/containers/Tensor.cpp‎
Lines changed: 60 additions & 0 deletions b/‎backends/vulkan/runtime/api/containers/Tensor.cpp‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/api/containers/Tensor.h‎
Lines changed: 27 additions & 0 deletions b/‎backends/vulkan/runtime/api/containers/Tensor.h‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ComputeGraph.h‎
Lines changed: 12 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ComputeGraph.h‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/GraphConfig.cpp‎
Lines changed: 1 addition & 0 deletions b/‎backends/vulkan/runtime/graph/GraphConfig.cpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/GraphConfig.h‎
Lines changed: 3 additions & 0 deletions b/‎backends/vulkan/runtime/graph/GraphConfig.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/ExecuteNode.cpp‎
Lines changed: 3 additions & 2 deletions b/‎backends/vulkan/runtime/graph/ops/ExecuteNode.cpp‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/binary_op_defs.glslh‎
Lines changed: 56 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/binary_op_defs.glslh‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/binary_scalar_buffer.glsl‎
Lines changed: 45 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/binary_scalar_buffer.glsl‎
Lines changed: 45 additions & 0 deletions
@@ -202,7 +202,7 @@ ExecuTorch powers on-device AI at scale across Meta's family of apps, VR/AR devi
 
 **LLMs:** [Llama 3.2/3.1/3](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), [LiquidAI LFM2](examples/models/lfm2/README.md)
 
-**Multimodal:** [Llava](examples/models/llava/README.md) (vision-language), [Voxtral](examples/models/voxtral/README.md) (audio-language)
+**Multimodal:** [Llava](examples/models/llava/README.md) (vision-language), [Voxtral](examples/models/voxtral/README.md) (audio-language), [Gemma](examples/models/gemma3) (vision-language)
 
 **Vision/Speech:** [MobileNetV2](https://github.com/meta-pytorch/executorch-examples/tree/main/mv2), [DeepLabV3](https://github.com/meta-pytorch/executorch-examples/tree/main/dl3), [Whisper](https://github.com/meta-pytorch/executorch-examples/tree/main/whisper/android/WhisperApp)
 
 
@@ -228,6 +228,18 @@ def register_binary_op():
     )
 
 
+@update_features(
+    [
+        exir_ops.edge.aten.pow.Tensor_Scalar,
+    ]
+)
+def register_binary_scalar_op():
+    return OpFeatures(
+        inputs_storage=utils.ANY_STORAGE,
+        supports_resize=True,
+    )
+
+
 @update_features(
     [
         exir_ops.edge.aten.abs.default,
 
@@ -836,6 +836,50 @@ void vTensor::BufferMetadata::update(
   numel = utils::safe_downcast<uint32_t>(src_numel);
 }
 
+vTensor::TextureMetadata::TextureMetadata(
+    const std::vector<int64_t>& src_sizes,
+    const TextureLimits& src_logical_limits,
+    const std::vector<int64_t>& src_axis_map,
+    const int32_t src_packed_dim) {
+  update(src_sizes, src_logical_limits, src_axis_map, src_packed_dim);
+}
+
+void vTensor::TextureMetadata::update(
+    const std::vector<int64_t>& src_sizes,
+    const TextureLimits& src_logical_limits,
+    const std::vector<int64_t>& src_axis_map,
+    const int32_t src_packed_dim) {
+  // Convert sizes to flipped and unsqueezed format (fixed to 4 dimensions for
+  // texture)
+  std::vector<int32_t> fu_sizes =
+      flip_and_unsqueeze<int32_t>(src_sizes, kTensorSizes, 0, 4);
+
+  // Copy sizes (up to 4 elements)
+  for (int i = 0; i < 4; ++i) {
+    sizes[i] = fu_sizes.at(i);
+  }
+
+  // Copy logical limits (3 elements)
+  logical_limits[0] =
+      utils::safe_downcast<int32_t>(src_logical_limits.limits[0]);
+  logical_limits[1] =
+      utils::safe_downcast<int32_t>(src_logical_limits.limits[1]);
+  logical_limits[2] =
+      utils::safe_downcast<int32_t>(src_logical_limits.limits[2]);
+  logical_limits[3] = 1u;
+
+  // Copy axis map (up to 4 elements)
+  for (int i = 0; i < 4 && i < src_axis_map.size(); ++i) {
+    axis_map[i] = utils::safe_downcast<int32_t>(src_axis_map.at(i));
+  }
+  // Pad with zeros if axis_map is smaller than 4
+  for (int i = src_axis_map.size(); i < 4; ++i) {
+    axis_map[i] = 0;
+  }
+
+  packed_dim = src_packed_dim;
+}
+
 vkapi::VulkanImage& vTensor::image(
     vkapi::PipelineBarrier& pipeline_barrier,
     const vkapi::PipelineStageFlags stage) & {
@@ -948,6 +992,16 @@ const vkapi::BufferBindInfo vTensor::buffer_meta_ubo() {
   return vkapi::BufferBindInfo(buffer_meta_.buffer(), 0, ubo_nbytes);
 }
 
+const vkapi::BufferBindInfo vTensor::texture_meta_ubo() {
+  size_t ubo_nbytes = sizeof(TextureMetadata);
+  if (!texture_meta_.buffer()) {
+    TextureLimits limits(logical_limits());
+    TextureMetadata data(sizes_, limits, axis_map_, packed_dim_);
+    texture_meta_ = ParamsBuffer(storage_->context_, data);
+  }
+  return vkapi::BufferBindInfo(texture_meta_.buffer(), 0, ubo_nbytes);
+}
+
 VkMemoryRequirements vTensor::get_memory_requirements() const {
   switch (storage_type()) {
     case utils::kBuffer:
@@ -1031,6 +1085,12 @@ void vTensor::update_metadata() {
     BufferMetadata data(sizes_, dim_order_, strides_, numel_);
     buffer_meta_.update(data);
   }
+
+  if (texture_meta_.buffer()) {
+    TextureMetadata data(
+        sizes_, uniform_data_->logical_limits, axis_map_, packed_dim_);
+    texture_meta_.update(data);
+  }
 }
 
 void vTensor::check_sizes(const std::vector<int64_t>& sizes) const {
 
@@ -285,6 +285,25 @@ class vTensor final {
         size_t numel);
   };
 
+  struct TextureMetadata {
+    int32_t sizes[4];
+    int32_t logical_limits[4];
+    int32_t axis_map[4];
+    int32_t packed_dim;
+
+    TextureMetadata(
+        const std::vector<int64_t>& sizes,
+        const TextureLimits& logical_limits,
+        const std::vector<int64_t>& axis_map,
+        const int32_t packed_dim);
+
+    void update(
+        const std::vector<int64_t>& sizes,
+        const TextureLimits& logical_limits,
+        const std::vector<int64_t>& axis_map,
+        const int32_t packed_dim);
+  };
+
  private:
   /*
    * "Core" tensor metadata. They are the minimum amount of information required
@@ -360,6 +379,12 @@ class vTensor final {
    */
   ParamsBuffer buffer_meta_;
 
+  /*
+   * Used to store data for TextureMetadata to pass to shaders as
+   * texture_meta_ubo
+   */
+  ParamsBuffer texture_meta_;
+
   uint32_t uniforms_size_ = 0u;
   uint32_t sizes_uniform_offset_ = kUniformOffsetUnset;
   uint32_t dim_order_uniform_offset_ = kUniformOffsetUnset;
@@ -587,6 +612,8 @@ class vTensor final {
 
   const vkapi::BufferBindInfo buffer_meta_ubo();
 
+  const vkapi::BufferBindInfo texture_meta_ubo();
+
  public:
   inline size_t staging_buffer_numel() const {
     return storage_->buffer_len();
 
@@ -449,6 +449,18 @@ class ComputeGraph final {
     return values_.at(idx).toTensor().buffer_meta_ubo();
   }
 
+  inline vkapi::BufferBindInfo texture_meta_ubo(const ValueRef idx) {
+    return values_.at(idx).toTensor().texture_meta_ubo();
+  }
+
+  inline vkapi::BufferBindInfo meta_ubo(const ValueRef idx) {
+    if (is_buffer_storage(idx)) {
+      return buffer_meta_ubo(idx);
+    } else {
+      return texture_meta_ubo(idx);
+    }
+  }
+
   inline vkapi::BufferBindInfo strides_ubo(const ValueRef idx) {
     return values_.at(idx).toTensor().strides_ubo();
   }
 
@@ -65,6 +65,7 @@ GraphConfig::GraphConfig() {
   local_wg_size_override = {};
 
   expect_dynamic_shapes = false;
+  force_resize = false;
 
   external_adapter = nullptr;
 }
 
@@ -35,6 +35,9 @@ struct GraphConfig final {
 
   // Whether or not the ComputeGraph should expect input shapes to be dynamic
   bool expect_dynamic_shapes;
+  // Used for testing/debugging only. Forces ExecuteNode to trigger the resize
+  // function even if none of the inputs have been updated.
+  bool force_resize = false;
 
   // Execution properties that determine specifics re: how command buffer
   // submission is handled, etc. 0 means this field is not set.
 
@@ -21,9 +21,10 @@ ExecuteNode::ExecuteNode(
       name_(name) {}
 
 bool ExecuteNode::trigger_resize(ComputeGraph* graph) {
-  const bool any_arg_updated = was_any_arg_updated(graph);
-  if (resize_fn_ && any_arg_updated) {
+  bool any_arg_updated = was_any_arg_updated(graph);
+  if (resize_fn_ && (any_arg_updated || graph->graphconfig().force_resize)) {
     resize_fn_(graph, args_, resize_args_);
+    any_arg_updated = true;
   }
   return any_arg_updated;
 }
 
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#ifndef BINARY_OP_DEFS_GLSLH
+#define BINARY_OP_DEFS_GLSLH
+
+//
+// Power operation that handles negative and zero bases
+//
+// In GLSL, pow(x, y) is undefined for x < 0. This function provides
+// a safe implementation that:
+// - Handles x == 0 (returns 0 for y > 0, returns 1 for y == 0)
+// - Handles x < 0 by using absolute value and preserving sign for odd integer exponents
+// - Uses standard pow() for x > 0
+//
+
+// Scalar overload
+T power_of(T x, T y) {
+  if (x == 0.0) {
+    // Handle 0^y: 0^0 = 1, 0^y = 0 for y > 0
+    return (y == 0.0) ? T(1.0) : T(0.0);
+  }
+
+  // Use absolute value to avoid undefined behavior
+  float result = pow(abs(x), y);
+
+  // For negative bases with odd integer exponents, preserve the negative sign
+  if (x < 0.0) {
+    float int_y = round(y);
+    if (abs(y - int_y) < 1e-5 && int(int_y) % 2 == 1) {
+      result = -result;
+    }
+  }
+
+  return T(result);
+}
+
+#ifdef VEC4_T
+
+// Vector overload
+VEC4_T power_of(VEC4_T x, VEC4_T y) {
+  VEC4_T result;
+  for (int i = 0; i < 4; i++) {
+    result[i] = power_of(x[i], y[i]);
+  }
+  return result;
+}
+
+#endif // VEC4_T
+
+#endif // BINARY_OP_DEFS_GLSLH
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#version 450 core
+
+#define PRECISION ${PRECISION}
+
+#define NAME ${VARIANT_NAME}
+
+#define T ${buffer_scalar_type(DTYPE)}
+
+#define op(X, Y) ${OPERATOR}
+
+${define_active_storage_type(STORAGE)}
+${define_required_extensions(DTYPE)}
+
+layout(std430) buffer;
+
+#include "indexing.glslh"
+
+${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
+${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
+
+${layout_declare_ubo(B, "BufferMetadata", "outp")}
+${layout_declare_ubo(B, "BufferMetadata", "inp")}
+
+${layout_declare_ubo(B, "float", "scalar_value")}
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+#include "binary_op_defs.glslh"
+
+void main() {
+  const uint out_bufi = gl_GlobalInvocationID.x;
+  if (out_of_bounds(out_bufi, outp)) {
+    return;
+  }
+
+  t_out[out_bufi] = T(op(t_in[out_bufi], T(scalar_value)));
+}
Original file line number	Diff line number	Diff line change
`@@ -65,6 +65,7 @@ GraphConfig::GraphConfig() {`
`65`	`65`	`local_wg_size_override = {};`
`66`	`66`
`67`	`67`	`expect_dynamic_shapes = false;`
	`68`	`+ force_resize = false;`
`68`	`69`
`69`	`70`	`external_adapter = nullptr;`
`70`	`71`	`}`
Original file line number	Diff line number	Diff line change
`@@ -21,9 +21,10 @@ ExecuteNode::ExecuteNode(`
`21`	`21`	`name_(name) {}`
`22`	`22`
`23`	`23`	`bool ExecuteNode::trigger_resize(ComputeGraph* graph) {`
`24`		`- const bool any_arg_updated = was_any_arg_updated(graph);`
`25`		`- if (resize_fn_ && any_arg_updated) {`
	`24`	`+ bool any_arg_updated = was_any_arg_updated(graph);`
	`25`	`+ if (resize_fn_ && (any_arg_updated \|\| graph->graphconfig().force_resize)) {`
`26`	`26`	`resize_fn_(graph, args_, resize_args_);`
	`27`	`+ any_arg_updated = true;`
`27`	`28`	`}`
`28`	`29`	`return any_arg_updated;`
`29`	`30`	`}`