Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions examples/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
}
} else if (arg == "-tt" || arg == "--token_test") {
params.token_test = get_next_arg(i, argc, argv, arg, params);
} else if (arg == "--perplexity") {
params.perplexity = true;
}
else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
Expand Down Expand Up @@ -112,6 +114,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
fprintf(stderr, " --repeat-penalty N penalize repeat sequence of tokens (default: %.2f, 1.0 = disabled)\n", (double)params.repeat_penalty);
fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch);
fprintf(stderr, " -m FNAME, --model FNAME\n");
fprintf(stderr, " --perplexity compute perplexity over the prompt\n");
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
fprintf(stderr, "\n");
}
Expand Down Expand Up @@ -765,6 +768,22 @@ float similarity(const std::string & s0, const std::string & s1) {
return 1.0f - (dist / std::max(s0.size(), s1.size()));
}

std::vector<float> softmax(const std::vector<float> & logits) {
std::vector<float> probs(logits.size());
float max_logit = logits[0];
for (float v : logits) max_logit = std::max(max_logit, v);
double sum_exp = 0.0;
for (size_t i = 0; i < logits.size(); i++) {
// Subtract the maximum logit value from the current logit value for numerical stability
const float logit = logits[i] - max_logit;
const float exp_logit = expf(logit);
sum_exp += exp_logit;
probs[i] = exp_logit;
}
for (size_t i = 0; i < probs.size(); i++) probs[i] /= sum_exp;
return probs;
}

bool sam_params_parse(int argc, char ** argv, sam_params & params) {
for (int i = 1; i < argc; i++) {
std::string arg = argv[i];
Expand Down
5 changes: 5 additions & 0 deletions examples/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ struct gpt_params {
int32_t interactive_port = -1;

int32_t n_gpu_layers = 0;

bool perplexity = false;
};

bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
Expand Down Expand Up @@ -158,6 +160,9 @@ bool vad_simple(
// compute similarity between two strings using Levenshtein distance
float similarity(const std::string & s0, const std::string & s1);

// softmax for ppl
std::vector<float> softmax(const std::vector<float> & logits);

//
// SAM argument parsing
//
Expand Down
20 changes: 19 additions & 1 deletion examples/gpt-2/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,14 @@ int main(int argc, char ** argv) {
// tokenize the prompt
std::vector<gpt_vocab::id> embd_inp = ::gpt_tokenize(vocab, params.prompt);

params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());
if (params.perplexity) {
// NOTE(xcsong): We only calculate perplexity on prompt (n_predict = 0).
// To get logits, we need to process prompt token-by-token (n_batch = 1).
params.n_predict = 0;
params.n_batch = 1;
} else {
params.n_predict = std::min(params.n_predict, model.hparams.n_ctx - (int) embd_inp.size());
}

printf("%s: prompt: '%s'\n", __func__, params.prompt.c_str());
printf("%s: number of tokens in prompt = %zu, first 8 tokens: ", __func__, embd_inp.size());
Expand All @@ -816,6 +823,7 @@ int main(int argc, char ** argv) {
// this reduces the memory usage during inference, at the cost of a bit of speed at the beginning
std::vector<gpt_vocab::id> embd;

double nll = 0.0; // for perplexity
for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
// predict
if (embd.size() > 0) {
Expand All @@ -827,6 +835,11 @@ int main(int argc, char ** argv) {
}

t_predict_us += ggml_time_us() - t_start_us;

if (params.perplexity) {
const float prob = softmax(logits)[embd_inp[i]];
nll += -std::log(prob);
}
}

n_past += embd.size();
Expand Down Expand Up @@ -875,6 +888,11 @@ int main(int argc, char ** argv) {
}
}

if (params.perplexity) {
// perplexity is e^(average negative log-likelihood)
printf("\n\nperplexity:%.8lf\n", std::exp(nll / (embd_inp.size() - 1)));
}

// report timing
{
const int64_t t_main_end_us = ggml_time_us();
Expand Down
16 changes: 0 additions & 16 deletions examples/mpt/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -681,22 +681,6 @@ bool mpt_eval(const mpt_model & model, const int n_threads, const int n_past,
return true;
}

std::vector<float> softmax(const std::vector<float> & logits) {
std::vector<float> probs(logits.size());
float max_logit = logits[0];
for (float v : logits) max_logit = std::max(max_logit, v);
double sum_exp = 0.0;
for (size_t i = 0; i < logits.size(); i++) {
// Subtract the maximum logit value from the current logit value for numerical stability
const float logit = logits[i] - max_logit;
const float exp_logit = expf(logit);
sum_exp += exp_logit;
probs[i] = exp_logit;
}
for (size_t i = 0; i < probs.size(); i++) probs[i] /= sum_exp;
return probs;
}

int perplexity(const mpt_params & params) {
ggml_time_init();

Expand Down