Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 2 additions & 56 deletions src/models/models.h
Original file line number Diff line number Diff line change
Expand Up @@ -542,8 +542,7 @@ struct llm_build_qwen3next : public llm_build_delta_net_base {
const llama_model & model;
};

// TODO: derive llm_build_delta_net_base instead
struct llm_build_qwen35 : public llm_graph_context {
struct llm_build_qwen35 : public llm_build_delta_net_base {
llm_build_qwen35(const llama_model & model, const llm_graph_params & params);
private:
ggml_tensor * build_layer_attn(
Expand All @@ -556,39 +555,12 @@ struct llm_build_qwen35 : public llm_graph_context {
ggml_tensor * build_layer_attn_linear(
llm_graph_input_rs * inp,
ggml_tensor * cur,
ggml_tensor * causal_mask,
ggml_tensor * identity,
ggml_tensor * diag_mask,
int il);


ggml_tensor * build_layer_ffn(
ggml_tensor * cur,
int il);

// returns pair of output and new state
std::pair<ggml_tensor *, ggml_tensor *> build_delta_net_chunking(
ggml_tensor * q,
ggml_tensor * k,
ggml_tensor * v,
ggml_tensor * g,
ggml_tensor * beta,
ggml_tensor * state,
ggml_tensor * causal_mask,
ggml_tensor * identity,
ggml_tensor * diag_mask,
int il);

// returns pair of output and new state
std::pair<ggml_tensor *, ggml_tensor *> build_delta_net_autoregressive(
ggml_tensor * q,
ggml_tensor * k,
ggml_tensor * v,
ggml_tensor * g,
ggml_tensor * beta,
ggml_tensor * state,
int il);

ggml_tensor * build_norm_gated(
ggml_tensor * input,
ggml_tensor * weights,
Expand All @@ -604,7 +576,7 @@ struct llm_build_qwen35 : public llm_graph_context {
};

// TODO: derive llm_build_delta_net_base instead
struct llm_build_qwen35moe : public llm_graph_context {
struct llm_build_qwen35moe : public llm_build_delta_net_base {
llm_build_qwen35moe(const llama_model & model, const llm_graph_params & params);
private:
ggml_tensor * build_layer_attn(
Expand All @@ -617,38 +589,12 @@ struct llm_build_qwen35moe : public llm_graph_context {
ggml_tensor * build_layer_attn_linear(
llm_graph_input_rs * inp,
ggml_tensor * cur,
ggml_tensor * causal_mask,
ggml_tensor * identity,
ggml_tensor * diag_mask,
int il);

ggml_tensor * build_layer_ffn(
ggml_tensor * cur,
int il);

// returns pair of output and new state
std::pair<ggml_tensor *, ggml_tensor *> build_delta_net_chunking(
ggml_tensor * q,
ggml_tensor * k,
ggml_tensor * v,
ggml_tensor * g,
ggml_tensor * beta,
ggml_tensor * state,
ggml_tensor * causal_mask,
ggml_tensor * identity,
ggml_tensor * diag_mask,
int il);

// returns pair of output and new state
std::pair<ggml_tensor *, ggml_tensor *> build_delta_net_autoregressive(
ggml_tensor * q,
ggml_tensor * k,
ggml_tensor * v,
ggml_tensor * g,
ggml_tensor * beta,
ggml_tensor * state,
int il);

ggml_tensor * build_norm_gated(
ggml_tensor * input,
ggml_tensor * weights,
Expand Down
Loading
Loading