Skip to content

Commit

Permalink
feat: bump llama.cpp to b3889
Browse files Browse the repository at this point in the history
  • Loading branch information
hans00 committed Oct 7, 2024
1 parent 8967d9b commit 165e5c0
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 48 deletions.
9 changes: 0 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,6 @@ if (VULKAN_SDK)
find_package(Vulkan REQUIRED)
endif()

find_program(PATCH patch REQUIRED)

add_custom_target(
patch ALL
COMMAND ${PATCH} -p1 -N < ${CMAKE_SOURCE_DIR}/patches/llama.patch || true
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/llama.cpp
COMMENT "Applying patches"
)

set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
add_subdirectory("src/llama.cpp")

Expand Down
22 changes: 0 additions & 22 deletions patches/llama.patch

This file was deleted.

12 changes: 6 additions & 6 deletions src/LlamaCompletionWorker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@ void LlamaCompletionWorker::Execute() {
size_t n_cur = 0;
size_t n_input = 0;
const auto model = _sess->model();
const bool add_bos = llama_should_add_bos_token(model);
const bool add_bos = llama_add_bos_token(model);
auto ctx = _sess->context();

llama_set_rng_seed(ctx, _params.seed);
auto sparams = llama_sampler_chain_default_params();

LlamaCppSampling sampling{llama_sampling_init(_params.sparams),
llama_sampling_free};
LlamaCppSampling sampling{gpt_sampler_init(model, _params.sparams),
gpt_sampler_free};

std::vector<llama_token> prompt_tokens =
::llama_tokenize(ctx, _params.prompt, add_bos);
Expand Down Expand Up @@ -109,8 +109,8 @@ void LlamaCompletionWorker::Execute() {
}
// sample the next token
const llama_token new_token_id =
llama_sampling_sample(sampling.get(), ctx, nullptr);
llama_sampling_accept(sampling.get(), ctx, new_token_id, true);
gpt_sampler_sample(sampling.get(), ctx, -1);
gpt_sampler_accept(sampling.get(), new_token_id, true);
// prepare the next batch
embd->emplace_back(new_token_id);
auto token = llama_token_to_piece(ctx, new_token_id);
Expand Down
16 changes: 7 additions & 9 deletions src/LlamaContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
params.embedding = get_option<bool>(options, "embedding", false);
params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
params.n_threads =
params.cpuparams.n_threads =
get_option<int32_t>(options, "n_threads", cpu_get_num_math() / 2);
params.n_gpu_layers = get_option<int32_t>(options, "n_gpu_layers", -1);
params.use_mlock = get_option<bool>(options, "use_mlock", false);
Expand All @@ -86,16 +86,14 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
llama_backend_init();
llama_numa_init(params.numa);

llama_model *model;
llama_context *ctx;
std::tie(model, ctx) = llama_init_from_gpt_params(params);
auto result = llama_init_from_gpt_params(params);

if (model == nullptr || ctx == nullptr) {
if (result.model == nullptr || result.context == nullptr) {
Napi::TypeError::New(env, "Failed to load model")
.ThrowAsJavaScriptException();
}

_sess = std::make_shared<LlamaSession>(model, ctx, params);
_sess = std::make_shared<LlamaSession>(result.model, result.context, params);
_info = gpt_params_get_system_info(params);
}

Expand Down Expand Up @@ -167,11 +165,11 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
params.sparams.penalty_present =
get_option<float>(options, "penalty_present", 0.00f);
params.sparams.penalize_nl = get_option<bool>(options, "penalize_nl", false);
params.sparams.typical_p = get_option<float>(options, "typical_p", 1.00f);
params.ignore_eos = get_option<float>(options, "ignore_eos", false);
params.sparams.typ_p = get_option<float>(options, "typical_p", 1.00f);
params.sparams.ignore_eos = get_option<float>(options, "ignore_eos", false);
params.sparams.grammar = get_option<std::string>(options, "grammar", "");
params.n_keep = get_option<int32_t>(options, "n_keep", 0);
params.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
params.sparams.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
std::vector<std::string> stop_words;
if (options.Has("stop") && options.Get("stop").IsArray()) {
auto stop_words_array = options.Get("stop").As<Napi::Array>();
Expand Down
3 changes: 2 additions & 1 deletion src/common.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include "common/common.h"
#include "common/sampling.h"
#include "llama.h"
#include <memory>
#include <mutex>
Expand All @@ -12,7 +13,7 @@

typedef std::unique_ptr<llama_model, decltype(&llama_free_model)> LlamaCppModel;
typedef std::unique_ptr<llama_context, decltype(&llama_free)> LlamaCppContext;
typedef std::unique_ptr<llama_sampling_context, decltype(&llama_sampling_free)>
typedef std::unique_ptr<gpt_sampler, decltype(&gpt_sampler_free)>
LlamaCppSampling;
typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;

Expand Down
2 changes: 1 addition & 1 deletion src/llama.cpp
Submodule llama.cpp updated 337 files

0 comments on commit 165e5c0

Please sign in to comment.