|
13 | 13 | // |
14 | 14 |
|
15 | 15 | struct gpt_params { |
16 | | - int32_t seed = -1; // RNG seed |
17 | | - int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); |
18 | | - int32_t n_predict = 128; // new tokens to predict |
| 16 | + int32_t seed = -1; // RNG seed |
| 17 | + int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); |
| 18 | + int32_t n_predict = 128; // new tokens to predict |
19 | 19 | int32_t repeat_last_n = 64; // last n tokens to penalize |
20 | | - int32_t n_ctx = 512; //context size |
21 | | - bool memory_f16 = false; // use f16 instead of f32 for memory kv |
| 20 | + int32_t n_ctx = 512; //context size |
22 | 21 |
|
23 | 22 | // sampling parameters |
24 | 23 | int32_t top_k = 40; |
25 | 24 | float top_p = 0.95f; |
26 | 25 | float temp = 0.80f; |
27 | | - float repeat_penalty = 1.30f; |
| 26 | + float repeat_penalty = 1.10f; |
28 | 27 |
|
29 | 28 | int32_t n_batch = 8; // batch size for prompt processing |
30 | 29 |
|
31 | | - std::string model = "models/lamma-7B/ggml-model.bin"; // model path |
32 | | - std::string prompt = ""; |
| 30 | + std::string model = "models/lamma-7B/ggml-model.bin"; // model path |
| 31 | + std::string prompt = ""; |
33 | 32 |
|
34 | | - bool random_prompt = false; |
35 | | - |
36 | | - bool use_color = false; // use color to distinguish generations and inputs |
| 33 | + std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted |
37 | 34 |
|
38 | | - bool interactive = false; // interactive mode |
| 35 | + bool memory_f16 = false; // use f16 instead of f32 for memory kv |
| 36 | + bool random_prompt = false; // do not randomize prompt if none provided |
| 37 | + bool use_color = false; // use color to distinguish generations and inputs |
| 38 | + bool interactive = false; // interactive mode |
39 | 39 | bool interactive_start = false; // reverse prompt immediately |
40 | | - std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted |
41 | | - bool instruct = false; // instruction mode (used for Alpaca models) |
42 | | - bool ignore_eos = false; // do not stop generating after eos |
| 40 | + bool instruct = false; // instruction mode (used for Alpaca models) |
| 41 | + bool ignore_eos = false; // do not stop generating after eos |
43 | 42 | }; |
44 | 43 |
|
45 | 44 | bool gpt_params_parse(int argc, char ** argv, gpt_params & params); |
|
0 commit comments