@@ -441,6 +441,8 @@ struct test {
441441 static const std::string gpu_info;
442442 std::string model_filename;
443443 std::string model_type;
444+ uint64_t model_size;
445+ uint64_t model_n_params;
444446 int n_batch;
445447 int n_threads;
446448 bool f32_kv;
@@ -457,8 +459,10 @@ struct test {
457459 test (const cmd_params_instance & inst, const llama_model * lmodel, const llama_context * ctx) {
458460 model_filename = inst.model ;
459461 char buf[128 ];
460- llama_model_type (lmodel, buf, sizeof (buf));
462+ llama_model_desc (lmodel, buf, sizeof (buf));
461463 model_type = buf;
464+ model_size = llama_model_size (lmodel);
465+ model_n_params = llama_model_n_params (lmodel);
462466 n_batch = inst.n_batch ;
463467 n_threads = inst.n_threads ;
464468 f32_kv = inst.f32_kv ;
@@ -524,7 +528,7 @@ struct test {
524528 " build_commit" , " build_number" ,
525529 " cuda" , " opencl" , " metal" , " gpu_blas" , " blas" ,
526530 " cpu_info" , " gpu_info" ,
527- " model_filename" , " model_type" ,
531+ " model_filename" , " model_type" , " model_size " , " model_n_params " ,
528532 " n_batch" , " n_threads" , " f16_kv" ,
529533 " n_gpu_layers" , " main_gpu" , " mul_mat_q" , " low_vram" , " tensor_split" ,
530534 " n_prompt" , " n_gen" , " test_time" ,
@@ -538,6 +542,7 @@ struct test {
538542
539543 static field_type get_field_type (const std::string & field) {
540544 if (field == " build_number" || field == " n_batch" || field == " n_threads" ||
545+ field == " model_size" || field == " model_n_params" ||
541546 field == " n_gpu_layers" || field == " main_gpu" ||
542547 field == " n_prompt" || field == " n_gen" ||
543548 field == " avg_ns" || field == " stddev_ns" ) {
@@ -573,7 +578,7 @@ struct test {
573578 build_commit, std::to_string (build_number),
574579 std::to_string (cuda), std::to_string (opencl), std::to_string (metal), std::to_string (gpu_blas), std::to_string (blas),
575580 cpu_info, gpu_info,
576- model_filename, model_type,
581+ model_filename, model_type, std::to_string (model_size), std::to_string (model_n_params),
577582 std::to_string (n_batch), std::to_string (n_threads), std::to_string (!f32_kv),
578583 std::to_string (n_gpu_layers), std::to_string (main_gpu), std::to_string (mul_mat_q), std::to_string (low_vram), tensor_split_str,
579584 std::to_string (n_prompt), std::to_string (n_gen), test_time,
@@ -709,8 +714,15 @@ struct markdown_printer : public printer {
709714 return -30 ;
710715 }
711716 if (field == " t/s" ) {
712- return 15 ;
717+ return 16 ;
713718 }
719+ if (field == " size" || field == " params" ) {
720+ return 10 ;
721+ }
722+ if (field == " n_gpu_layers" ) {
723+ return 3 ;
724+ }
725+
714726 int width = std::max ((int )field.length (), 10 );
715727
716728 if (test::get_field_type (field) == test::STRING) {
@@ -719,9 +731,28 @@ struct markdown_printer : public printer {
719731 return width;
720732 }
721733
734+ static std::string get_field_display_name (const std::string & field) {
735+ if (field == " n_gpu_layers" ) {
736+ return " ngl" ;
737+ }
738+ if (field == " n_threads" ) {
739+ return " threads" ;
740+ }
741+ if (field == " mul_mat_q" ) {
742+ return " mmq" ;
743+ }
744+ if (field == " tensor_split" ) {
745+ return " ts" ;
746+ }
747+ return field;
748+ }
749+
722750 void print_header (const cmd_params & params) override {
723751 // select fields to print
724- fields = { " model" , " backend" };
752+ fields.push_back (" model" );
753+ fields.push_back (" size" );
754+ fields.push_back (" params" );
755+ fields.push_back (" backend" );
725756 bool is_cpu_backend = test::get_backend () == " CPU" || test::get_backend () == " BLAS" ;
726757 if (!is_cpu_backend) {
727758 fields.push_back (" n_gpu_layers" );
@@ -752,7 +783,7 @@ struct markdown_printer : public printer {
752783
753784 fprintf (fout, " |" );
754785 for (const auto & field : fields) {
755- fprintf (fout, " %*s |" , get_field_width (field), field.c_str ());
786+ fprintf (fout, " %*s |" , get_field_width (field), get_field_display_name ( field) .c_str ());
756787 }
757788 fprintf (fout, " \n " );
758789 fprintf (fout, " |" );
@@ -769,12 +800,26 @@ struct markdown_printer : public printer {
769800 fprintf (fout, " |" );
770801 for (const auto & field : fields) {
771802 std::string value;
803+ char buf[128 ];
772804 if (field == " model" ) {
773805 value = t.model_type ;
806+ } else if (field == " size" ) {
807+ if (t.model_size < 1024 *1024 *1024 ) {
808+ snprintf (buf, sizeof (buf), " %.2f MiB" , t.model_size / 1024.0 / 1024.0 );
809+ } else {
810+ snprintf (buf, sizeof (buf), " %.2f GiB" , t.model_size / 1024.0 / 1024.0 / 1024.0 );
811+ }
812+ value = buf;
813+ } else if (field == " params" ) {
814+ if (t.model_n_params < 1000 *1000 *1000 ) {
815+ snprintf (buf, sizeof (buf), " %.2f M" , t.model_n_params / 1e6 );
816+ } else {
817+ snprintf (buf, sizeof (buf), " %.2f B" , t.model_n_params / 1e9 );
818+ }
819+ value = buf;
774820 } else if (field == " backend" ) {
775821 value = test::get_backend ();
776822 } else if (field == " test" ) {
777- char buf[128 ];
778823 if (t.n_prompt > 0 && t.n_gen == 0 ) {
779824 snprintf (buf, sizeof (buf), " pp %d" , t.n_prompt );
780825 } else if (t.n_gen > 0 && t.n_prompt == 0 ) {
@@ -785,7 +830,6 @@ struct markdown_printer : public printer {
785830 }
786831 value = buf;
787832 } else if (field == " t/s" ) {
788- char buf[128 ];
789833 snprintf (buf, sizeof (buf), " %.2f ± %.2f" , t.avg_ts (), t.stdev_ts ());
790834 value = buf;
791835 } else if (vmap.find (field) != vmap.end ()) {
0 commit comments