Skip to content

Commit

Permalink
Adapted LLM model to single .tgz file to download
Browse files Browse the repository at this point in the history
  • Loading branch information
iWas-Coder committed Oct 26, 2024
1 parent 3f7a7ba commit 5e1752b
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 20 deletions.
22 changes: 8 additions & 14 deletions ai/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ set(SPM_ENABLE_SHARED OFF CACHE BOOL "" FORCE)
set(GEMMA_ENABLE_TESTS OFF CACHE BOOL "" FORCE)
set(HWY_ENABLE_EXAMPLES OFF CACHE BOOL "" FORCE)
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)

FetchContent_Declare(
gemma.cpp
Expand All @@ -25,22 +26,14 @@ FetchContent_Declare(
GIT_SHALLOW TRUE
)
FetchContent_Declare(
llm-model-weights
URL "https://huggingface.co/sparky-game/carbon/resolve/main/weights.sbs"
URL_HASH SHA256=4703b49c4e7177a949a5e60d91a5078f81e0d9ce80f0afa4ab4cb9af44fd334c
DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}
DOWNLOAD_NO_EXTRACT TRUE
)
FetchContent_Declare(
llm-model-tokenizer
URL "https://huggingface.co/sparky-game/carbon/resolve/main/tokenizer.spm"
URL_HASH SHA256=61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2
DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}
DOWNLOAD_NO_EXTRACT TRUE
llm-model
URL "https://huggingface.co/sparky-game/carbon/resolve/main/model.tgz"
URL_HASH SHA256=6c42d35e1707910c5fd57104f85ff7e16022884deea7160dbe7c6c9c93431556
DOWNLOAD_DIR ${CMAKE_CURRENT_BINARY_DIR}
DOWNLOAD_EXTRACT_TIMESTAMP TRUE
)
FetchContent_MakeAvailable(gemma.cpp)
FetchContent_MakeAvailable(llm-model-weights)
FetchContent_MakeAvailable(llm-model-tokenizer)
FetchContent_MakeAvailable(llm-model)

set_target_properties(
gemma
Expand All @@ -55,4 +48,5 @@ set_target_properties(
target_compile_options(sentencepiece-static PRIVATE "-w")

add_executable(testgen testgen.cc)
target_link_options(testgen PRIVATE "-static")
target_link_libraries(testgen libgemma)
14 changes: 8 additions & 6 deletions ai/testgen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#define CARBON_AI_ABORT_ERROR(err) HWY_ABORT("%s:%u :: %s", __FILE__, __LINE__, (err))
#define CARBON_AI_ABORT_USAGE HWY_ABORT("usage: %s [-t <N>] --src <FILE> --test <FILE>", argv[0])

#define LLM_MODEL_PATH "build/_deps/llm-model-src"

static constexpr auto system_prompt {
"Strictly follow the following instructions and rules:\n"
"- Just write the function that has been requested, no main, no examples, no nonsense.\n"
Expand All @@ -37,15 +39,15 @@ static constexpr auto request_prompt {
"Write an extended version of the unit test translation unit that includes additional unit tests that will increase the test coverage of the code under test.\n"
};

static inline std::string load_file_contents(const std::string &filepath) {
static std::string load_file_contents(const std::string &filepath) {
std::ifstream ifs { filepath };
if (not ifs) throw std::runtime_error { "unable to open file for reading (`" + filepath + "`)" };
std::stringstream ss;
ss << "```\n" << ifs.rdbuf() << "\n```\n";
return ss.str();
}

static inline std::vector<int> tokenize(const gcpp::GemmaTokenizer &tokenizer, const std::string &test_code, const std::string &src_code) {
static std::vector<int> tokenize(const gcpp::GemmaTokenizer &tokenizer, const std::string &test_code, const std::string &src_code) {
std::string prompt {
std::string(system_prompt) + "\n" +
test_code_prompt + "\n" +
Expand All @@ -60,7 +62,7 @@ static inline std::vector<int> tokenize(const gcpp::GemmaTokenizer &tokenizer, c
return tokens;
}

static inline void preprocess_output(std::stringstream &ss) {
static void preprocess_output(std::stringstream &ss) {
std::string line;
std::vector<std::string> lines;
while (std::getline(ss, line)) lines.emplace_back(line);
Expand All @@ -71,7 +73,7 @@ static inline void preprocess_output(std::stringstream &ss) {
}
}

static inline bool it_builds(const std::stringstream &ss) {
static bool it_builds(const std::stringstream &ss) {
std::ofstream ofs { "tmp.cc" };
if (not ofs) throw std::runtime_error { "unable to open/create file for writing (`./tmp.cc`)" };
ofs << ss.str();
Expand All @@ -84,8 +86,8 @@ static inline bool it_builds(const std::stringstream &ss) {

int main(int argc, char **argv) {
gcpp::LoaderArgs loader(argc, argv);
loader.tokenizer = "build/tokenizer.spm";
loader.weights = "build/weights.sbs";
loader.tokenizer = LLM_MODEL_PATH "tokenizer.spm";
loader.weights = LLM_MODEL_PATH "weights.sbs";
loader.model_type_str = "7b-it";
loader.weight_type_str = "sfp";

Expand Down

0 comments on commit 5e1752b

Please sign in to comment.