Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2012,6 +2012,10 @@ static void server_print_usage(const char *argv0, const gpt_params &params,
printf(" --yarn-attn-factor N YaRN: scale sqrt(t) or attention magnitude (default: 1.0)\n");
printf(" --yarn-beta-slow N YaRN: high correction dim or alpha (default: %.1f)\n", params.yarn_beta_slow);
printf(" --yarn-beta-fast N YaRN: low correction dim or beta (default: %.1f)\n", params.yarn_beta_fast);
printf(" --grp-attn-n N\n");
printf(" group-attention factor (default: %d)\n", params.grp_attn_n);
printf(" --grp-attn-w N\n");
printf(" group-attention width (default: %.1f)\n", (double)params.grp_attn_w);
printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
printf(" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
printf(" not recommended: doubles context memory required and no measurable increase in quality\n");
Expand Down Expand Up @@ -2236,6 +2240,24 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
}
params.yarn_beta_slow = std::stof(argv[i]);
}
else if (arg == "--grp-attn-n")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
params.grp_attn_n = std::stoi(argv[i]);
}
else if (arg == "--grp-attn-w")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
params.grp_attn_w = std::stoi(argv[i]);
}
else if (arg == "--threads" || arg == "-t")
{
if (++i >= argc)
Expand Down