1. remove inputs from signature as they are transient nodes

am17an · am17an · commit d85303640e5b · 2025-10-20T22:49:18.000+08:00
2. add check for views: view_src should be part of the subgraph
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2822,8 +2822,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
     std::initializer_list<enum ggml_op> topk_moe_ops_with_norm = ggml_cuda_topk_moe_ops(true);
 
     if (ops.size() == topk_moe_ops_with_norm.size() &&
-        ggml_can_fuse_subgraph(cgraph, node_idx, topk_moe_ops_with_norm, { node_idx },
-                               { node_idx + 3, node_idx + 8 })) {
+        ggml_can_fuse_subgraph(cgraph, node_idx, topk_moe_ops_with_norm, { node_idx + 3, node_idx + 8 })) {
         ggml_tensor * softmax = cgraph->nodes[node_idx];
         ggml_tensor * weights = cgraph->nodes[node_idx+8];
 
@@ -2833,7 +2832,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
     }
 
     if (ops.size() == topk_moe_ops.size() &&
-        ggml_can_fuse_subgraph(cgraph, node_idx, topk_moe_ops, { node_idx }, { node_idx + 3, node_idx + 4 })) {
+        ggml_can_fuse_subgraph(cgraph, node_idx, topk_moe_ops, { node_idx + 3, node_idx + 4 })) {
         ggml_tensor * softmax = cgraph->nodes[node_idx];
         ggml_tensor * weights = cgraph->nodes[node_idx+4];
         if (ggml_cuda_should_use_topk_moe(softmax, weights)) {
diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
@@ -651,20 +651,16 @@ GGML_API bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
                                          const int *                node_idxs,
                                          int                        count,
                                          const enum ggml_op *       ops,
-                                         const int *                inputs,
-                                         int                        num_inputs,
                                          const int *                outputs,
                                          int                        num_outputs);
 
 // Returns true if the subgraph formed by {node_idxs} can be fused
-// checks whethers all nodes which are not part of inputs/outputs can be elided
+// checks whethers all nodes which are not part of outputs can be elided
 // by checking if their num_uses are confined to the subgraph
 static inline bool ggml_can_fuse_subgraph(const struct ggml_cgraph * cgraph,
                                           int                        node_idx,
                                           int                        count,
                                           const enum ggml_op *       ops,
-                                          const int *                inputs,
-                                          int                        num_inputs,
                                           const int *                outputs,
                                           int                        num_outputs) {
     if (node_idx + count > cgraph->n_nodes) {
@@ -677,7 +673,7 @@ static inline bool ggml_can_fuse_subgraph(const struct ggml_cgraph * cgraph,
         idxs[i] = node_idx + i;
     }
 
-    return ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, inputs, num_inputs, outputs, num_outputs);
+    return ggml_can_fuse_subgraph_ext(cgraph, idxs, count, ops, outputs, num_outputs);
 }
 
 #ifdef __cplusplus
@@ -696,10 +692,8 @@ inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, std::
 inline bool ggml_can_fuse_subgraph(const struct ggml_cgraph *          cgraph,
                                    int                                 start_idx,
                                    std::initializer_list<enum ggml_op> ops,
-                                   std::initializer_list<int>          inputs  = {},
                                    std::initializer_list<int>          outputs = {}) {
-    return ggml_can_fuse_subgraph(cgraph, start_idx, ops.size(), ops.begin(), inputs.begin(), inputs.size(),
-                                  outputs.begin(), outputs.size());
+    return ggml_can_fuse_subgraph(cgraph, start_idx, ops.size(), ops.begin(), outputs.begin(), outputs.size());
 }
 
 // expose GGUF internals for test code
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
@@ -6989,11 +6989,9 @@ bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
                                 const int *                node_idxs,
                                 int                        count,
                                 const enum ggml_op *       ops,
-                                const int *                inputs,
-                                int                        num_inputs,
                                 const int *                outputs,
                                 int                        num_outputs) {
-    GGML_ASSERT(count < 32 && num_inputs > 0 && num_outputs > 0);
+    GGML_ASSERT(count < 32 && outputs && num_outputs > 0);
     int interior_nodes_count = 0;
     int interior_nodes[32];
 
@@ -7008,25 +7006,20 @@ bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
             return false;
         }
 
-        if (ggml_find_tensor_node_list(cgraph, inputs, num_inputs, node) != -1) {
-            continue;
-        }
-
         if (ggml_find_tensor_node_list(cgraph, outputs, num_outputs, node) != -1) {
             continue;
         }
 
         interior_nodes[interior_nodes_count++] = node_idxs[i];
     }
 
-    // if interior-node has n-uses, ensure that all of them lie within in this subgraph
     for (int i = 0; i < interior_nodes_count; ++i) {
         const int num_uses = ggml_node_get_use_count(cgraph, interior_nodes[i]);
 
         const struct ggml_tensor * node = cgraph->nodes[interior_nodes[i]];
 
+        // if interior-node has n-uses, ensure that all of them lie within in this subgraph
         int subgraph_uses = 0;
-        //check if all uses are within the graph
         for (int j = 0; j < count; ++j) {
             const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
             for (int src_idx = 0; src_idx < GGML_MAX_SRC; src_idx++) {
@@ -7039,6 +7032,14 @@ bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
         if (subgraph_uses != num_uses) {
             return false;
         }
+
+        // if node is a view, check if the view src is within the subgraph
+        if (node->view_src) {
+            const struct ggml_tensor * view_src = node->view_src;
+            if (ggml_find_tensor_node_list(cgraph, node_idxs, count, view_src) == -1) {
+                return false;
+            }
+        }
     }
 
     return true;