Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion app/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,16 @@ set(TARGET llama-app)
add_executable(${TARGET} llama.cpp)
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama)

target_link_libraries(${TARGET} PRIVATE llama-server-impl llama-cli-impl llama-completion-impl llama-bench-impl)
target_link_libraries(${TARGET} PRIVATE
llama-server-impl
llama-cli-impl
llama-completion-impl
llama-bench-impl
llama-batched-bench-impl
llama-fit-params-impl
llama-quantize-impl
llama-perplexity-impl
)
target_compile_features(${TARGET} PRIVATE cxx_std_17)

if(LLAMA_TOOLS_INSTALL)
Expand Down
22 changes: 16 additions & 6 deletions app/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,18 @@
#include <string>
#include <vector>

// visible
int llama_server(int argc, char ** argv);
int llama_cli(int argc, char ** argv);

// hidden
int llama_completion(int argc, char ** argv);
int llama_bench(int argc, char ** argv);
int llama_batched_bench(int argc, char ** argv);
int llama_fit_params(int argc, char ** argv);
int llama_quantize(int argc, char ** argv);
int llama_perplexity(int argc, char ** argv);

static int help(int argc, char ** argv);
static int version(int argc, char ** argv);

Expand All @@ -22,12 +28,16 @@ struct command {
};

static const command cmds[] = {
{"serve", "HTTP API server", {"server"}, false, llama_server },
{"cli", "Command-line interactive interface", {"client"}, false, llama_cli },
{"completion", "Text completion", {"complete"}, true, llama_completion },
{"bench", "Benchmarking tool", {}, true, llama_bench },
{"version", "Show version", {}, true, version },
{"help", "Show available commands", {}, true, help },
{"serve", "HTTP API server", {"server"}, false, llama_server },
{"cli", "Command-line interactive interface", {"client"}, false, llama_cli },
{"completion", "Text completion", {"complete"}, true, llama_completion },
{"bench", "Benchmark prompt processing and text generation", {}, true, llama_bench },
{"batched-bench", "Benchmark batched decoding performance", {}, true, llama_batched_bench},
{"fit-params", "Compute parameters to fit a model in device memory", {}, true, llama_fit_params },
{"quantize", "Quantize a model", {}, true, llama_quantize },
{"perplexity", "Compute model perplexity and KL divergence", {}, true, llama_perplexity },
{"version", "Show version", {}, true, version },
{"help", "Show available commands", {}, true, help },
};

static int version(int argc, char ** argv) {
Expand Down
16 changes: 14 additions & 2 deletions tools/batched-bench/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
# llama-batched-bench-impl: batched-bench logic, reusable by app

set(TARGET llama-batched-bench-impl)

add_library(${TARGET} STATIC batched-bench.cpp)

target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT})

# llama-batched-bench executable

set(TARGET llama-batched-bench)
add_executable(${TARGET} batched-bench.cpp)
target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT})

add_executable(${TARGET} main.cpp)
target_link_libraries(${TARGET} PRIVATE llama-batched-bench-impl)
target_compile_features(${TARGET} PRIVATE cxx_std_17)

if(LLAMA_TOOLS_INSTALL)
Expand Down
5 changes: 4 additions & 1 deletion tools/batched-bench/batched-bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ static void print_usage(int, char ** argv) {
LOG("\n");
}

int main(int argc, char ** argv) {
// satisfies -Wmissing-declarations
int llama_batched_bench(int argc, char ** argv);

int llama_batched_bench(int argc, char ** argv) {
std::setlocale(LC_NUMERIC, "C");

common_params params;
Expand Down
5 changes: 5 additions & 0 deletions tools/batched-bench/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
int llama_batched_bench(int argc, char ** argv);

int main(int argc, char ** argv) {
return llama_batched_bench(argc, argv);
}
16 changes: 14 additions & 2 deletions tools/fit-params/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
# llama-fit-params-impl: fit-params logic, reusable by app

set(TARGET llama-fit-params-impl)

add_library(${TARGET} STATIC fit-params.cpp)

target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT})

# llama-fit-params executable

set(TARGET llama-fit-params)
add_executable(${TARGET} fit-params.cpp)
target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT})

add_executable(${TARGET} main.cpp)
target_link_libraries(${TARGET} PRIVATE llama-fit-params-impl)
target_compile_features(${TARGET} PRIVATE cxx_std_17)

if(LLAMA_TOOLS_INSTALL)
Expand Down
5 changes: 4 additions & 1 deletion tools/fit-params/fit-params.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
#pragma warning(disable: 4244 4267) // possible loss of data
#endif

int main(int argc, char ** argv) {
// satisfies -Wmissing-declarations
int llama_fit_params(int argc, char ** argv);

int llama_fit_params(int argc, char ** argv) {
common_params params;

common_init();
Expand Down
5 changes: 5 additions & 0 deletions tools/fit-params/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
int llama_fit_params(int argc, char ** argv);

int main(int argc, char ** argv) {
return llama_fit_params(argc, argv);
}
16 changes: 14 additions & 2 deletions tools/perplexity/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
# llama-perplexity-impl: perplexity logic, reusable by app

set(TARGET llama-perplexity-impl)

add_library(${TARGET} STATIC perplexity.cpp)

target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT})

# llama-perplexity executable

set(TARGET llama-perplexity)
add_executable(${TARGET} perplexity.cpp)
target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT})

add_executable(${TARGET} main.cpp)
target_link_libraries(${TARGET} PRIVATE llama-perplexity-impl)
target_compile_features(${TARGET} PRIVATE cxx_std_17)

if(LLAMA_TOOLS_INSTALL)
Expand Down
5 changes: 5 additions & 0 deletions tools/perplexity/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
int llama_perplexity(int argc, char ** argv);

int main(int argc, char ** argv) {
return llama_perplexity(argc, argv);
}
5 changes: 4 additions & 1 deletion tools/perplexity/perplexity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2005,7 +2005,10 @@ static void kl_divergence(llama_context * ctx, const common_params & params) {
LOG("Same top p: %6.3lf ± %5.3lf %%\n", 100.0*same_top_p, 100.0*sqrt(same_top_p*(1.0 - same_top_p)/(kld.count - 1)));
}

int main(int argc, char ** argv) {
// satisfies -Wmissing-declarations
int llama_perplexity(int argc, char ** argv);

int llama_perplexity(int argc, char ** argv) {
std::setlocale(LC_NUMERIC, "C");

common_params params;
Expand Down
17 changes: 14 additions & 3 deletions tools/quantize/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
# llama-quantize-impl: quantize logic, reusable by app

set(TARGET llama-quantize-impl)

add_library(${TARGET} STATIC quantize.cpp)

target_include_directories(${TARGET} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(${TARGET} PUBLIC llama-common llama ${CMAKE_THREAD_LIBS_INIT})

# llama-quantize executable

set(TARGET llama-quantize)
add_executable(${TARGET} quantize.cpp)
target_link_libraries(${TARGET} PRIVATE llama-common llama ${CMAKE_THREAD_LIBS_INIT})
target_include_directories(${TARGET} PRIVATE ../../common)

add_executable(${TARGET} main.cpp)
target_link_libraries(${TARGET} PRIVATE llama-quantize-impl)
target_compile_features(${TARGET} PRIVATE cxx_std_17)

if(LLAMA_TOOLS_INSTALL)
Expand Down
5 changes: 5 additions & 0 deletions tools/quantize/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
int llama_quantize(int argc, char ** argv);

int main(int argc, char ** argv) {
return llama_quantize(argc, argv);
}
5 changes: 4 additions & 1 deletion tools/quantize/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,10 @@ static bool parse_layer_prune(const char * data, std::vector<int> & prune_layers
return true;
}

int main(int argc, char ** argv) {
// satisfies -Wmissing-declarations
int llama_quantize(int argc, char ** argv);

int llama_quantize(int argc, char ** argv) {
std::setlocale(LC_NUMERIC, "C");
if (argc < 3) {
usage(argv[0]);
Expand Down
Loading