Skip to content

Commit 654234c

Browse files
Merge branch 'main' into fix_typos_before_ga
2 parents b02f38b + 455639b commit 654234c

34 files changed

+833
-58
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ ExecuTorch powers on-device AI at scale across Meta's family of apps, VR/AR devi
202202

203203
**LLMs:** [Llama 3.2/3.1/3](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), [LiquidAI LFM2](examples/models/lfm2/README.md)
204204

205-
**Multimodal:** [Llava](examples/models/llava/README.md) (vision-language), [Voxtral](examples/models/voxtral/README.md) (audio-language)
205+
**Multimodal:** [Llava](examples/models/llava/README.md) (vision-language), [Voxtral](examples/models/voxtral/README.md) (audio-language), [Gemma](examples/models/gemma3) (vision-language)
206206

207207
**Vision/Speech:** [MobileNetV2](https://github.com/meta-pytorch/executorch-examples/tree/main/mv2), [DeepLabV3](https://github.com/meta-pytorch/executorch-examples/tree/main/dl3), [Whisper](https://github.com/meta-pytorch/executorch-examples/tree/main/whisper/android/WhisperApp)
208208

backends/vulkan/op_registry.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,18 @@ def register_binary_op():
228228
)
229229

230230

231+
@update_features(
232+
[
233+
exir_ops.edge.aten.pow.Tensor_Scalar,
234+
]
235+
)
236+
def register_binary_scalar_op():
237+
return OpFeatures(
238+
inputs_storage=utils.ANY_STORAGE,
239+
supports_resize=True,
240+
)
241+
242+
231243
@update_features(
232244
[
233245
exir_ops.edge.aten.abs.default,

backends/vulkan/runtime/api/containers/Tensor.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -836,6 +836,50 @@ void vTensor::BufferMetadata::update(
836836
numel = utils::safe_downcast<uint32_t>(src_numel);
837837
}
838838

839+
vTensor::TextureMetadata::TextureMetadata(
840+
const std::vector<int64_t>& src_sizes,
841+
const TextureLimits& src_logical_limits,
842+
const std::vector<int64_t>& src_axis_map,
843+
const int32_t src_packed_dim) {
844+
update(src_sizes, src_logical_limits, src_axis_map, src_packed_dim);
845+
}
846+
847+
void vTensor::TextureMetadata::update(
848+
const std::vector<int64_t>& src_sizes,
849+
const TextureLimits& src_logical_limits,
850+
const std::vector<int64_t>& src_axis_map,
851+
const int32_t src_packed_dim) {
852+
// Convert sizes to flipped and unsqueezed format (fixed to 4 dimensions for
853+
// texture)
854+
std::vector<int32_t> fu_sizes =
855+
flip_and_unsqueeze<int32_t>(src_sizes, kTensorSizes, 0, 4);
856+
857+
// Copy sizes (up to 4 elements)
858+
for (int i = 0; i < 4; ++i) {
859+
sizes[i] = fu_sizes.at(i);
860+
}
861+
862+
// Copy logical limits (3 elements)
863+
logical_limits[0] =
864+
utils::safe_downcast<int32_t>(src_logical_limits.limits[0]);
865+
logical_limits[1] =
866+
utils::safe_downcast<int32_t>(src_logical_limits.limits[1]);
867+
logical_limits[2] =
868+
utils::safe_downcast<int32_t>(src_logical_limits.limits[2]);
869+
logical_limits[3] = 1u;
870+
871+
// Copy axis map (up to 4 elements)
872+
for (int i = 0; i < 4 && i < src_axis_map.size(); ++i) {
873+
axis_map[i] = utils::safe_downcast<int32_t>(src_axis_map.at(i));
874+
}
875+
// Pad with zeros if axis_map is smaller than 4
876+
for (int i = src_axis_map.size(); i < 4; ++i) {
877+
axis_map[i] = 0;
878+
}
879+
880+
packed_dim = src_packed_dim;
881+
}
882+
839883
vkapi::VulkanImage& vTensor::image(
840884
vkapi::PipelineBarrier& pipeline_barrier,
841885
const vkapi::PipelineStageFlags stage) & {
@@ -948,6 +992,16 @@ const vkapi::BufferBindInfo vTensor::buffer_meta_ubo() {
948992
return vkapi::BufferBindInfo(buffer_meta_.buffer(), 0, ubo_nbytes);
949993
}
950994

995+
const vkapi::BufferBindInfo vTensor::texture_meta_ubo() {
996+
size_t ubo_nbytes = sizeof(TextureMetadata);
997+
if (!texture_meta_.buffer()) {
998+
TextureLimits limits(logical_limits());
999+
TextureMetadata data(sizes_, limits, axis_map_, packed_dim_);
1000+
texture_meta_ = ParamsBuffer(storage_->context_, data);
1001+
}
1002+
return vkapi::BufferBindInfo(texture_meta_.buffer(), 0, ubo_nbytes);
1003+
}
1004+
9511005
VkMemoryRequirements vTensor::get_memory_requirements() const {
9521006
switch (storage_type()) {
9531007
case utils::kBuffer:
@@ -1031,6 +1085,12 @@ void vTensor::update_metadata() {
10311085
BufferMetadata data(sizes_, dim_order_, strides_, numel_);
10321086
buffer_meta_.update(data);
10331087
}
1088+
1089+
if (texture_meta_.buffer()) {
1090+
TextureMetadata data(
1091+
sizes_, uniform_data_->logical_limits, axis_map_, packed_dim_);
1092+
texture_meta_.update(data);
1093+
}
10341094
}
10351095

10361096
void vTensor::check_sizes(const std::vector<int64_t>& sizes) const {

backends/vulkan/runtime/api/containers/Tensor.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,25 @@ class vTensor final {
285285
size_t numel);
286286
};
287287

288+
struct TextureMetadata {
289+
int32_t sizes[4];
290+
int32_t logical_limits[4];
291+
int32_t axis_map[4];
292+
int32_t packed_dim;
293+
294+
TextureMetadata(
295+
const std::vector<int64_t>& sizes,
296+
const TextureLimits& logical_limits,
297+
const std::vector<int64_t>& axis_map,
298+
const int32_t packed_dim);
299+
300+
void update(
301+
const std::vector<int64_t>& sizes,
302+
const TextureLimits& logical_limits,
303+
const std::vector<int64_t>& axis_map,
304+
const int32_t packed_dim);
305+
};
306+
288307
private:
289308
/*
290309
* "Core" tensor metadata. They are the minimum amount of information required
@@ -360,6 +379,12 @@ class vTensor final {
360379
*/
361380
ParamsBuffer buffer_meta_;
362381

382+
/*
383+
* Used to store data for TextureMetadata to pass to shaders as
384+
* texture_meta_ubo
385+
*/
386+
ParamsBuffer texture_meta_;
387+
363388
uint32_t uniforms_size_ = 0u;
364389
uint32_t sizes_uniform_offset_ = kUniformOffsetUnset;
365390
uint32_t dim_order_uniform_offset_ = kUniformOffsetUnset;
@@ -587,6 +612,8 @@ class vTensor final {
587612

588613
const vkapi::BufferBindInfo buffer_meta_ubo();
589614

615+
const vkapi::BufferBindInfo texture_meta_ubo();
616+
590617
public:
591618
inline size_t staging_buffer_numel() const {
592619
return storage_->buffer_len();

backends/vulkan/runtime/graph/ComputeGraph.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,18 @@ class ComputeGraph final {
449449
return values_.at(idx).toTensor().buffer_meta_ubo();
450450
}
451451

452+
inline vkapi::BufferBindInfo texture_meta_ubo(const ValueRef idx) {
453+
return values_.at(idx).toTensor().texture_meta_ubo();
454+
}
455+
456+
inline vkapi::BufferBindInfo meta_ubo(const ValueRef idx) {
457+
if (is_buffer_storage(idx)) {
458+
return buffer_meta_ubo(idx);
459+
} else {
460+
return texture_meta_ubo(idx);
461+
}
462+
}
463+
452464
inline vkapi::BufferBindInfo strides_ubo(const ValueRef idx) {
453465
return values_.at(idx).toTensor().strides_ubo();
454466
}

backends/vulkan/runtime/graph/GraphConfig.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ GraphConfig::GraphConfig() {
6565
local_wg_size_override = {};
6666

6767
expect_dynamic_shapes = false;
68+
force_resize = false;
6869

6970
external_adapter = nullptr;
7071
}

backends/vulkan/runtime/graph/GraphConfig.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ struct GraphConfig final {
3535

3636
// Whether or not the ComputeGraph should expect input shapes to be dynamic
3737
bool expect_dynamic_shapes;
38+
// Used for testing/debugging only. Forces ExecuteNode to trigger the resize
39+
// function even if none of the inputs have been updated.
40+
bool force_resize = false;
3841

3942
// Execution properties that determine specifics re: how command buffer
4043
// submission is handled, etc. 0 means this field is not set.

backends/vulkan/runtime/graph/ops/ExecuteNode.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,10 @@ ExecuteNode::ExecuteNode(
2121
name_(name) {}
2222

2323
bool ExecuteNode::trigger_resize(ComputeGraph* graph) {
24-
const bool any_arg_updated = was_any_arg_updated(graph);
25-
if (resize_fn_ && any_arg_updated) {
24+
bool any_arg_updated = was_any_arg_updated(graph);
25+
if (resize_fn_ && (any_arg_updated || graph->graphconfig().force_resize)) {
2626
resize_fn_(graph, args_, resize_args_);
27+
any_arg_updated = true;
2728
}
2829
return any_arg_updated;
2930
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#ifndef BINARY_OP_DEFS_GLSLH
10+
#define BINARY_OP_DEFS_GLSLH
11+
12+
//
13+
// Power operation that handles negative and zero bases
14+
//
15+
// In GLSL, pow(x, y) is undefined for x < 0. This function provides
16+
// a safe implementation that:
17+
// - Handles x == 0 (returns 0 for y > 0, returns 1 for y == 0)
18+
// - Handles x < 0 by using absolute value and preserving sign for odd integer exponents
19+
// - Uses standard pow() for x > 0
20+
//
21+
22+
// Scalar overload
23+
T power_of(T x, T y) {
24+
if (x == 0.0) {
25+
// Handle 0^y: 0^0 = 1, 0^y = 0 for y > 0
26+
return (y == 0.0) ? T(1.0) : T(0.0);
27+
}
28+
29+
// Use absolute value to avoid undefined behavior
30+
float result = pow(abs(x), y);
31+
32+
// For negative bases with odd integer exponents, preserve the negative sign
33+
if (x < 0.0) {
34+
float int_y = round(y);
35+
if (abs(y - int_y) < 1e-5 && int(int_y) % 2 == 1) {
36+
result = -result;
37+
}
38+
}
39+
40+
return T(result);
41+
}
42+
43+
#ifdef VEC4_T
44+
45+
// Vector overload
46+
VEC4_T power_of(VEC4_T x, VEC4_T y) {
47+
VEC4_T result;
48+
for (int i = 0; i < 4; i++) {
49+
result[i] = power_of(x[i], y[i]);
50+
}
51+
return result;
52+
}
53+
54+
#endif // VEC4_T
55+
56+
#endif // BINARY_OP_DEFS_GLSLH
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#version 450 core
10+
11+
#define PRECISION ${PRECISION}
12+
13+
#define NAME ${VARIANT_NAME}
14+
15+
#define T ${buffer_scalar_type(DTYPE)}
16+
17+
#define op(X, Y) ${OPERATOR}
18+
19+
${define_active_storage_type(STORAGE)}
20+
${define_required_extensions(DTYPE)}
21+
22+
layout(std430) buffer;
23+
24+
#include "indexing.glslh"
25+
26+
${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)}
27+
${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
28+
29+
${layout_declare_ubo(B, "BufferMetadata", "outp")}
30+
${layout_declare_ubo(B, "BufferMetadata", "inp")}
31+
32+
${layout_declare_ubo(B, "float", "scalar_value")}
33+
34+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
35+
36+
#include "binary_op_defs.glslh"
37+
38+
void main() {
39+
const uint out_bufi = gl_GlobalInvocationID.x;
40+
if (out_of_bounds(out_bufi, outp)) {
41+
return;
42+
}
43+
44+
t_out[out_bufi] = T(op(t_in[out_bufi], T(scalar_value)));
45+
}

0 commit comments

Comments
 (0)