diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index 2dddff9d407..6c53ce0e4e2 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -3,7 +3,16 @@ set(TARGET llama-app) add_executable(${TARGET} llama.cpp) set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama) -target_link_libraries(${TARGET} PRIVATE llama-server-impl llama-cli-impl llama-completion-impl llama-bench-impl) +target_link_libraries(${TARGET} PRIVATE + llama-server-impl + llama-cli-impl + llama-completion-impl + llama-bench-impl + llama-batched-bench-impl + llama-fit-params-impl + llama-quantize-impl + llama-perplexity-impl +) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/app/llama.cpp b/app/llama.cpp index 55aa8ca5ee0..e149975d28c 100644 --- a/app/llama.cpp +++ b/app/llama.cpp @@ -4,12 +4,18 @@ #include #include +// visible int llama_server(int argc, char ** argv); int llama_cli(int argc, char ** argv); // hidden int llama_completion(int argc, char ** argv); int llama_bench(int argc, char ** argv); +int llama_batched_bench(int argc, char ** argv); +int llama_fit_params(int argc, char ** argv); +int llama_quantize(int argc, char ** argv); +int llama_perplexity(int argc, char ** argv); + static int help(int argc, char ** argv); static int version(int argc, char ** argv); @@ -22,12 +28,16 @@ struct command { }; static const command cmds[] = { - {"serve", "HTTP API server", {"server"}, false, llama_server }, - {"cli", "Command-line interactive interface", {"client"}, false, llama_cli }, - {"completion", "Text completion", {"complete"}, true, llama_completion }, - {"bench", "Benchmarking tool", {}, true, llama_bench }, - {"version", "Show version", {}, true, version }, - {"help", "Show available commands", {}, true, help }, + {"serve", "HTTP API server", {"server"}, false, llama_server }, + {"cli", "Command-line interactive interface", {"client"}, false, llama_cli }, + {"completion", "Text completion", {"complete"}, true, llama_completion }, + {"bench", "Benchmark prompt processing and text generation", {}, true, llama_bench }, + {"batched-bench", "Benchmark batched decoding performance", {}, true, llama_batched_bench}, + {"fit-params", "Compute parameters to fit a model in device memory", {}, true, llama_fit_params }, + {"quantize", "Quantize a model", {}, true, llama_quantize }, + {"perplexity", "Compute model perplexity and KL divergence", {}, true, llama_perplexity }, + {"version", "Show version", {}, true, version }, + {"help", "Show available commands", {}, true, help }, }; static int version(int argc, char ** argv) { diff --git a/tools/batched-bench/CMakeLists.txt b/tools/batched-bench/CMakeLists.txt index f9ffd2d4ce7..1769c2136b1 100644 --- a/tools/batched-bench/CMakeLists.txt +++ b/tools/batched-bench/CMakeLists.txt @@ -1,6 +1,18 @@ +# llama-batched-bench-impl: batched-bench logic, reusable by app + +set(TARGET llama-batched-bench-impl) + +add_library(${TARGET} STATIC batched-bench.cpp) + +target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT}) + +# llama-batched-bench executable + set(TARGET llama-batched-bench) -add_executable(${TARGET} batched-bench.cpp) -target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) + +add_executable(${TARGET} main.cpp) +target_link_libraries(${TARGET} PRIVATE llama-batched-bench-impl) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/batched-bench/batched-bench.cpp b/tools/batched-bench/batched-bench.cpp index 3964ef25955..e2dcd0b2e71 100644 --- a/tools/batched-bench/batched-bench.cpp +++ b/tools/batched-bench/batched-bench.cpp @@ -15,7 +15,10 @@ static void print_usage(int, char ** argv) { LOG("\n"); } -int main(int argc, char ** argv) { +// satisfies -Wmissing-declarations +int llama_batched_bench(int argc, char ** argv); + +int llama_batched_bench(int argc, char ** argv) { std::setlocale(LC_NUMERIC, "C"); common_params params; diff --git a/tools/batched-bench/main.cpp b/tools/batched-bench/main.cpp new file mode 100644 index 00000000000..958cfc5b31c --- /dev/null +++ b/tools/batched-bench/main.cpp @@ -0,0 +1,5 @@ +int llama_batched_bench(int argc, char ** argv); + +int main(int argc, char ** argv) { + return llama_batched_bench(argc, argv); +} diff --git a/tools/fit-params/CMakeLists.txt b/tools/fit-params/CMakeLists.txt index 25c40966333..207caf2ceda 100644 --- a/tools/fit-params/CMakeLists.txt +++ b/tools/fit-params/CMakeLists.txt @@ -1,6 +1,18 @@ +# llama-fit-params-impl: fit-params logic, reusable by app + +set(TARGET llama-fit-params-impl) + +add_library(${TARGET} STATIC fit-params.cpp) + +target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT}) + +# llama-fit-params executable + set(TARGET llama-fit-params) -add_executable(${TARGET} fit-params.cpp) -target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) + +add_executable(${TARGET} main.cpp) +target_link_libraries(${TARGET} PRIVATE llama-fit-params-impl) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/fit-params/fit-params.cpp b/tools/fit-params/fit-params.cpp index 20a5ff1ebd0..5d897bc4669 100644 --- a/tools/fit-params/fit-params.cpp +++ b/tools/fit-params/fit-params.cpp @@ -12,7 +12,10 @@ #pragma warning(disable: 4244 4267) // possible loss of data #endif -int main(int argc, char ** argv) { +// satisfies -Wmissing-declarations +int llama_fit_params(int argc, char ** argv); + +int llama_fit_params(int argc, char ** argv) { common_params params; common_init(); diff --git a/tools/fit-params/main.cpp b/tools/fit-params/main.cpp new file mode 100644 index 00000000000..b7271d4756a --- /dev/null +++ b/tools/fit-params/main.cpp @@ -0,0 +1,5 @@ +int llama_fit_params(int argc, char ** argv); + +int main(int argc, char ** argv) { + return llama_fit_params(argc, argv); +} diff --git a/tools/perplexity/CMakeLists.txt b/tools/perplexity/CMakeLists.txt index 0c194ee7f08..44061d0a551 100644 --- a/tools/perplexity/CMakeLists.txt +++ b/tools/perplexity/CMakeLists.txt @@ -1,6 +1,18 @@ +# llama-perplexity-impl: perplexity logic, reusable by app + +set(TARGET llama-perplexity-impl) + +add_library(${TARGET} STATIC perplexity.cpp) + +target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT}) + +# llama-perplexity executable + set(TARGET llama-perplexity) -add_executable(${TARGET} perplexity.cpp) -target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) + +add_executable(${TARGET} main.cpp) +target_link_libraries(${TARGET} PRIVATE llama-perplexity-impl) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/perplexity/main.cpp b/tools/perplexity/main.cpp new file mode 100644 index 00000000000..13a9940e9ee --- /dev/null +++ b/tools/perplexity/main.cpp @@ -0,0 +1,5 @@ +int llama_perplexity(int argc, char ** argv); + +int main(int argc, char ** argv) { + return llama_perplexity(argc, argv); +} diff --git a/tools/perplexity/perplexity.cpp b/tools/perplexity/perplexity.cpp index 75defd7c87b..f66576eb404 100644 --- a/tools/perplexity/perplexity.cpp +++ b/tools/perplexity/perplexity.cpp @@ -2005,7 +2005,10 @@ static void kl_divergence(llama_context * ctx, const common_params & params) { LOG("Same top p: %6.3lf ± %5.3lf %%\n", 100.0*same_top_p, 100.0*sqrt(same_top_p*(1.0 - same_top_p)/(kld.count - 1))); } -int main(int argc, char ** argv) { +// satisfies -Wmissing-declarations +int llama_perplexity(int argc, char ** argv); + +int llama_perplexity(int argc, char ** argv) { std::setlocale(LC_NUMERIC, "C"); common_params params; diff --git a/tools/quantize/CMakeLists.txt b/tools/quantize/CMakeLists.txt index 965adc0059b..e76f7d81108 100644 --- a/tools/quantize/CMakeLists.txt +++ b/tools/quantize/CMakeLists.txt @@ -1,7 +1,18 @@ +# llama-quantize-impl: quantize logic, reusable by app + +set(TARGET llama-quantize-impl) + +add_library(${TARGET} STATIC quantize.cpp) + +target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT}) + +# llama-quantize executable + set(TARGET llama-quantize) -add_executable(${TARGET} quantize.cpp) -target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT}) -target_include_directories(${TARGET} PRIVATE ../../common) + +add_executable(${TARGET} main.cpp) +target_link_libraries(${TARGET} PRIVATE llama-quantize-impl) target_compile_features(${TARGET} PRIVATE cxx_std_17) if(LLAMA_TOOLS_INSTALL) diff --git a/tools/quantize/main.cpp b/tools/quantize/main.cpp new file mode 100644 index 00000000000..fc247190c83 --- /dev/null +++ b/tools/quantize/main.cpp @@ -0,0 +1,5 @@ +int llama_quantize(int argc, char ** argv); + +int main(int argc, char ** argv) { + return llama_quantize(argc, argv); +} diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp index 3d33d47d98b..7292bda6f4e 100644 --- a/tools/quantize/quantize.cpp +++ b/tools/quantize/quantize.cpp @@ -490,7 +490,10 @@ static bool parse_layer_prune(const char * data, std::vector & prune_layers return true; } -int main(int argc, char ** argv) { +// satisfies -Wmissing-declarations +int llama_quantize(int argc, char ** argv); + +int llama_quantize(int argc, char ** argv) { std::setlocale(LC_NUMERIC, "C"); if (argc < 3) { usage(argv[0]);