From 2d8e1da7474ef01497eabae26ec4a0624e6e2292 Mon Sep 17 00:00:00 2001 From: iWas-Coder Date: Thu, 18 Jul 2024 01:28:47 +0200 Subject: [PATCH 01/15] testgen: added necessary projects as submodules --- .gitmodules | 9 +++++++++ ai/vendor/gemma.cpp | 1 + ai/vendor/highway | 1 + ai/vendor/sentencepiece | 1 + 4 files changed, 12 insertions(+) create mode 100644 .gitmodules create mode 160000 ai/vendor/gemma.cpp create mode 160000 ai/vendor/highway create mode 160000 ai/vendor/sentencepiece diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..f43598f --- /dev/null +++ b/.gitmodules @@ -0,0 +1,9 @@ +[submodule "ai/vendor/highway"] + path = ai/vendor/highway + url = https://github.com/google/highway +[submodule "ai/vendor/sentencepiece"] + path = ai/vendor/sentencepiece + url = https://github.com/google/sentencepiece +[submodule "ai/vendor/gemma.cpp"] + path = ai/vendor/gemma.cpp + url = https://github.com/google/gemma.cpp diff --git a/ai/vendor/gemma.cpp b/ai/vendor/gemma.cpp new file mode 160000 index 0000000..960ff4b --- /dev/null +++ b/ai/vendor/gemma.cpp @@ -0,0 +1 @@ +Subproject commit 960ff4b4ec583d77ecad7a14c5149012240cc7e0 diff --git a/ai/vendor/highway b/ai/vendor/highway new file mode 160000 index 0000000..1cf089d --- /dev/null +++ b/ai/vendor/highway @@ -0,0 +1 @@ +Subproject commit 1cf089d07c8fb5531cea04b505ac8d152581f401 diff --git a/ai/vendor/sentencepiece b/ai/vendor/sentencepiece new file mode 160000 index 0000000..2de10cb --- /dev/null +++ b/ai/vendor/sentencepiece @@ -0,0 +1 @@ +Subproject commit 2de10cb30e982b980125d4713236dd2b29cc5f0c From 4674997c87ea0454c7d7d1aaab1ecd3e00d8d072 Mon Sep 17 00:00:00 2001 From: iWas-Coder Date: Thu, 18 Jul 2024 01:33:18 +0200 Subject: [PATCH 02/15] testgen: first iteration of the build system (*GNU make*) --- ai/GNUmakefile | 116 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 ai/GNUmakefile diff --git a/ai/GNUmakefile b/ai/GNUmakefile new file mode 100644 index 0000000..3a00872 --- /dev/null +++ b/ai/GNUmakefile @@ -0,0 +1,116 @@ +PPO_MKDIR = MKDIR +PPO_CLEAN = CLEAN +PPO_CXX = CXX +PPO_AR = AR + +AR = ar -rcs +CXX = clang++ +OPTIMIZATIONS = -pipe -O3 + +HIGHWAY_HDR_DIR = vendor/highway +HIGHWAY_SRC_DIR = $(HIGHWAY_HDR_DIR)/hwy +HIGHWAY_BUILD_DIR = build/highway +HIGHWAY_SRCS := $(wildcard $(HIGHWAY_SRC_DIR)/contrib/sort/vqsort*.cc) $(addsuffix .cc, $(addprefix $(HIGHWAY_SRC_DIR)/, abort aligned_allocator nanobenchmark per_target print targets timer)) $(HIGHWAY_SRC_DIR)/contrib/thread_pool/topology.cc +HIGHWAY_OBJS := $(patsubst $(HIGHWAY_SRC_DIR)/%.cc, $(HIGHWAY_BUILD_DIR)/%.o, $(HIGHWAY_SRCS)) +HIGHWAY_OBJS := $(patsubst $(HIGHWAY_BUILD_DIR)/contrib/sort/%.o, $(HIGHWAY_BUILD_DIR)/%.o, $(HIGHWAY_OBJS)) +HIGHWAY_OBJS := $(patsubst $(HIGHWAY_BUILD_DIR)/contrib/thread_pool/%.o, $(HIGHWAY_BUILD_DIR)/%.o, $(HIGHWAY_OBJS)) +HIGHWAY_CPPFLAGS = -isystem $(HIGHWAY_HDR_DIR) +HIGHWAY_CXXFLAGS = -std=c++11 $(OPTIMIZATIONS) + +SENTENCEPIECE_HDR_DIR = vendor/sentencepiece +SENTENCEPIECE_SRC_DIR = $(SENTENCEPIECE_HDR_DIR)/src +SENTENCEPIECE_BUILD_DIR = build/sentencepiece +SENTENCEPIECE_SRCS := $(wildcard $(SENTENCEPIECE_HDR_DIR)/third_party/protobuf-lite/*.cc) $(SENTENCEPIECE_HDR_DIR)/third_party/absl/flags/flag.cc $(addsuffix .pb.cc, $(addprefix $(SENTENCEPIECE_SRC_DIR)/builtin_pb/, sentencepiece sentencepiece_model)) $(addsuffix .cc, $(addprefix $(SENTENCEPIECE_SRC_DIR)/, bpe_model char_model error filesystem model_factory model_interface normalizer sentencepiece_processor unigram_model util word_model)) +SENTENCEPIECE_OBJS := $(patsubst $(SENTENCEPIECE_SRC_DIR)/%.cc, $(SENTENCEPIECE_BUILD_DIR)/%.o, $(SENTENCEPIECE_SRCS)) +SENTENCEPIECE_OBJS := $(patsubst $(SENTENCEPIECE_BUILD_DIR)/builtin_pb/%.o, $(SENTENCEPIECE_BUILD_DIR)/%.o, $(SENTENCEPIECE_OBJS)) +SENTENCEPIECE_OBJS := $(patsubst $(SENTENCEPIECE_HDR_DIR)/third_party/protobuf-lite/%.cc, $(SENTENCEPIECE_BUILD_DIR)/%.o, $(SENTENCEPIECE_OBJS)) +SENTENCEPIECE_OBJS := $(patsubst $(SENTENCEPIECE_HDR_DIR)/third_party/absl/flags/%.cc, $(SENTENCEPIECE_BUILD_DIR)/%.o, $(SENTENCEPIECE_OBJS)) +SENTENCEPIECE_CPPFLAGS = -D HAVE_PTHREAD -isystem $(SENTENCEPIECE_HDR_DIR) -isystem $(SENTENCEPIECE_SRC_DIR)/builtin_pb -isystem $(SENTENCEPIECE_HDR_DIR)/third_party/protobuf-lite +SENTENCEPIECE_CXXFLAGS = -std=c++17 $(OPTIMIZATIONS) + +GEMMACPP_HDR_DIR = vendor/gemma.cpp +GEMMACPP_SRC_DIR = $(GEMMACPP_HDR_DIR) +GEMMACPP_BUILD_DIR = build/gemma.cpp +GEMMACPP_SRCS := $(addsuffix .cc, $(addprefix $(GEMMACPP_SRC_DIR)/backprop/, backward forward optimizer)) $(addsuffix .cc, $(addprefix $(GEMMACPP_SRC_DIR)/compression/, blob_store io_win io)) $(addsuffix .cc, $(addprefix $(GEMMACPP_SRC_DIR)/evals/, benchmark_helper cross_entropy)) $(wildcard $(GEMMACPP_SRC_DIR)/gemma/instantiations/*.cc) $(addsuffix .cc, $(addprefix $(GEMMACPP_SRC_DIR)/gemma/, common gemma kv_cache tokenizer weights)) +GEMMACPP_OBJS := $(patsubst $(GEMMACPP_SRC_DIR)/gemma/%.cc, $(GEMMACPP_BUILD_DIR)/%.o, $(GEMMACPP_SRCS)) +GEMMACPP_OBJS := $(patsubst $(GEMMACPP_BUILD_DIR)/instantiations/%.o, $(GEMMACPP_BUILD_DIR)/%.o, $(GEMMACPP_OBJS)) +GEMMACPP_OBJS := $(patsubst $(GEMMACPP_SRC_DIR)/evals/%.cc, $(GEMMACPP_BUILD_DIR)/%.o, $(GEMMACPP_OBJS)) +GEMMACPP_OBJS := $(patsubst $(GEMMACPP_SRC_DIR)/backprop/%.cc, $(GEMMACPP_BUILD_DIR)/%.o, $(GEMMACPP_OBJS)) +GEMMACPP_OBJS := $(patsubst $(GEMMACPP_SRC_DIR)/compression/%.cc, $(GEMMACPP_BUILD_DIR)/%.o, $(GEMMACPP_OBJS)) +GEMMACPP_CPPFLAGS = -isystem $(GEMMACPP_HDR_DIR) -isystem $(HIGHWAY_HDR_DIR) -isystem $(SENTENCEPIECE_HDR_DIR) +GEMMACPP_CXXFLAGS = -std=c++17 $(OPTIMIZATIONS) + +DIRS_OUT = $(HIGHWAY_BUILD_DIR) $(SENTENCEPIECE_BUILD_DIR) $(GEMMACPP_BUILD_DIR) + +.PHONY: all clean mrproper + +all: $(DIRS_OUT) libgemma.a + @: + +libgemma.a: $(HIGHWAY_OBJS) $(SENTENCEPIECE_OBJS) $(GEMMACPP_OBJS) + @echo " $(PPO_AR) $@" + @$(AR) $@ $^ + +$(DIRS_OUT): + @echo " $(PPO_MKDIR) $@" + @mkdir -p $@ + +$(HIGHWAY_BUILD_DIR)/%.o: $(HIGHWAY_SRC_DIR)/%.cc + @echo " $(PPO_CXX) $@" + @$(CXX) $(HIGHWAY_CPPFLAGS) $(HIGHWAY_CXXFLAGS) -c -MD $< -o $@ + +$(HIGHWAY_BUILD_DIR)/%.o: $(HIGHWAY_SRC_DIR)/contrib/sort/%.cc + @echo " $(PPO_CXX) $@" + @$(CXX) $(HIGHWAY_CPPFLAGS) $(HIGHWAY_CXXFLAGS) -c -MD $< -o $@ + +$(HIGHWAY_BUILD_DIR)/%.o: $(HIGHWAY_SRC_DIR)/contrib/thread_pool/%.cc + @echo " $(PPO_CXX) $@" + @$(CXX) $(HIGHWAY_CPPFLAGS) $(HIGHWAY_CXXFLAGS) -c -MD $< -o $@ + +$(SENTENCEPIECE_BUILD_DIR)/%.o: $(SENTENCEPIECE_SRC_DIR)/%.cc + @echo " $(PPO_CXX) $@" + @$(CXX) $(SENTENCEPIECE_CPPFLAGS) $(SENTENCEPIECE_CXXFLAGS) -c -MD $< -o $@ + +$(SENTENCEPIECE_BUILD_DIR)/%.o: $(SENTENCEPIECE_SRC_DIR)/builtin_pb/%.cc + @echo " $(PPO_CXX) $@" + @$(CXX) $(SENTENCEPIECE_CPPFLAGS) $(SENTENCEPIECE_CXXFLAGS) -c -MD $< -o $@ + +$(SENTENCEPIECE_BUILD_DIR)/%.o: $(SENTENCEPIECE_HDR_DIR)/third_party/protobuf-lite/%.cc + @echo " $(PPO_CXX) $@" + @$(CXX) $(SENTENCEPIECE_CPPFLAGS) $(SENTENCEPIECE_CXXFLAGS) -c -MD $< -o $@ + +$(SENTENCEPIECE_BUILD_DIR)/%.o: $(SENTENCEPIECE_HDR_DIR)/third_party/absl/flags/%.cc + @echo " $(PPO_CXX) $@" + @[ ! -e $(SENTENCEPIECE_HDR_DIR)/config.h ] && cp $(SENTENCEPIECE_HDR_DIR)/config.h.in $(SENTENCEPIECE_HDR_DIR)/config.h || true + @sed -i -e 's/@PROJECT_VERSION@/0.2.0/' -e 's/@PROJECT_NAME@/sentencepiece/' $(SENTENCEPIECE_HDR_DIR)/config.h + @$(CXX) $(SENTENCEPIECE_CPPFLAGS) $(SENTENCEPIECE_CXXFLAGS) -c -MD $< -o $@ + +$(GEMMACPP_BUILD_DIR)/%.o: $(GEMMACPP_SRC_DIR)/gemma/%.cc + @echo " $(PPO_CXX) $@" + @$(CXX) $(GEMMACPP_CPPFLAGS) $(GEMMACPP_CXXFLAGS) -c -MD $< -o $@ + +$(GEMMACPP_BUILD_DIR)/%.o: $(GEMMACPP_SRC_DIR)/gemma/instantiations/%.cc + @echo " $(PPO_CXX) $@" + @$(CXX) $(GEMMACPP_CPPFLAGS) $(GEMMACPP_CXXFLAGS) -c -MD $< -o $@ + +$(GEMMACPP_BUILD_DIR)/%.o: $(GEMMACPP_SRC_DIR)/evals/%.cc + @echo " $(PPO_CXX) $@" + @$(CXX) $(GEMMACPP_CPPFLAGS) $(GEMMACPP_CXXFLAGS) -c -MD $< -o $@ + +$(GEMMACPP_BUILD_DIR)/%.o: $(GEMMACPP_SRC_DIR)/backprop/%.cc + @echo " $(PPO_CXX) $@" + @$(CXX) $(GEMMACPP_CPPFLAGS) $(GEMMACPP_CXXFLAGS) -c -MD $< -o $@ + +$(GEMMACPP_BUILD_DIR)/%.o: $(GEMMACPP_SRC_DIR)/compression/%.cc + @echo " $(PPO_CXX) $@" + @$(CXX) $(GEMMACPP_CPPFLAGS) $(GEMMACPP_CXXFLAGS) -c -MD $< -o $@ + +-include $(HIGHWAY_BUILD_DIR)/*.d +-include $(GEMMACPP_BUILD_DIR)/*.d +-include $(SENTENCEPIECE_BUILD_DIR)/*.d + +clean: + @if [ -d build ]; then \ + echo " $(PPO_CLEAN) build"; \ + rm -r build; \ + fi From ade7273610c1275faf6a827b07f636333da43884 Mon Sep 17 00:00:00 2001 From: iWas-Coder Date: Thu, 18 Jul 2024 09:46:19 +0200 Subject: [PATCH 03/15] testgen: added model weights and tokenizer (*CodeGemma 7B Instruction-tuned SFP*) --- .gitattributes | 4 ++++ ai/model/tokenizer.spm | 3 +++ ai/model/weights.sbs.00 | 3 +++ ai/model/weights.sbs.01 | 3 +++ ai/model/weights.sbs.02 | 3 +++ ai/model/weights.sbs.03 | 3 +++ ai/model/weights.sbs.04 | 3 +++ ai/model/weights.sbs.05 | 3 +++ ai/model/weights.sbs.06 | 3 +++ ai/model/weights.sbs.07 | 3 +++ ai/model/weights.sbs.08 | 3 +++ ai/model/weights.sbs.09 | 3 +++ ai/model/weights.sbs.10 | 3 +++ ai/model/weights.sbs.11 | 3 +++ ai/model/weights.sbs.12 | 3 +++ ai/model/weights.sbs.13 | 3 +++ ai/model/weights.sbs.14 | 3 +++ ai/model/weights.sbs.15 | 3 +++ ai/model/weights.sbs.16 | 3 +++ ai/model/weights.sbs.17 | 3 +++ 20 files changed, 61 insertions(+) create mode 100644 ai/model/tokenizer.spm create mode 100644 ai/model/weights.sbs.00 create mode 100644 ai/model/weights.sbs.01 create mode 100644 ai/model/weights.sbs.02 create mode 100644 ai/model/weights.sbs.03 create mode 100644 ai/model/weights.sbs.04 create mode 100644 ai/model/weights.sbs.05 create mode 100644 ai/model/weights.sbs.06 create mode 100644 ai/model/weights.sbs.07 create mode 100644 ai/model/weights.sbs.08 create mode 100644 ai/model/weights.sbs.09 create mode 100644 ai/model/weights.sbs.10 create mode 100644 ai/model/weights.sbs.11 create mode 100644 ai/model/weights.sbs.12 create mode 100644 ai/model/weights.sbs.13 create mode 100644 ai/model/weights.sbs.14 create mode 100644 ai/model/weights.sbs.15 create mode 100644 ai/model/weights.sbs.16 create mode 100644 ai/model/weights.sbs.17 diff --git a/.gitattributes b/.gitattributes index 90eaec1..93a218d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,7 @@ * text eol=lf *.png binary + +*.spm filter=lfs diff=lfs merge=lfs -text +*.sbs filter=lfs diff=lfs merge=lfs -text +*.sbs.* filter=lfs diff=lfs merge=lfs -text diff --git a/ai/model/tokenizer.spm b/ai/model/tokenizer.spm new file mode 100644 index 0000000..71a98ce --- /dev/null +++ b/ai/model/tokenizer.spm @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:583f2ebd2a1936009b7da991ea255504db68c7a9713a78673d1335a87098966c +size 4241023 diff --git a/ai/model/weights.sbs.00 b/ai/model/weights.sbs.00 new file mode 100644 index 0000000..aa8c017 --- /dev/null +++ b/ai/model/weights.sbs.00 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aef14c4d17f7a2624b4bc9d128cad7490dbd11637ecd914273952695df69d776 +size 524288000 diff --git a/ai/model/weights.sbs.01 b/ai/model/weights.sbs.01 new file mode 100644 index 0000000..0a89033 --- /dev/null +++ b/ai/model/weights.sbs.01 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1435edceed0b761336abd0580ba3db7377b140c65c5772f7fec813287d8d20bd +size 524288000 diff --git a/ai/model/weights.sbs.02 b/ai/model/weights.sbs.02 new file mode 100644 index 0000000..e714d48 --- /dev/null +++ b/ai/model/weights.sbs.02 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a88ff63ce07b3aede351fb4fb29b124536bad59de804d73085602a3ec878617 +size 524288000 diff --git a/ai/model/weights.sbs.03 b/ai/model/weights.sbs.03 new file mode 100644 index 0000000..b9a7f56 --- /dev/null +++ b/ai/model/weights.sbs.03 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c81c85b21fc4ccd6ed2fa1d6a7d7de560c98a26246d756fa39806589dc4ad1ea +size 524288000 diff --git a/ai/model/weights.sbs.04 b/ai/model/weights.sbs.04 new file mode 100644 index 0000000..a017fc3 --- /dev/null +++ b/ai/model/weights.sbs.04 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:861c637fdfeec40e862b66b68ad82f7ef53738efb6aa0848b1f280e4ccaaba57 +size 524288000 diff --git a/ai/model/weights.sbs.05 b/ai/model/weights.sbs.05 new file mode 100644 index 0000000..1ecf4b3 --- /dev/null +++ b/ai/model/weights.sbs.05 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5da28afa300e3f8ff6f0a39b991ce77a5b2ba7b67a0e089a9cef40aaedf1361b +size 524288000 diff --git a/ai/model/weights.sbs.06 b/ai/model/weights.sbs.06 new file mode 100644 index 0000000..647f495 --- /dev/null +++ b/ai/model/weights.sbs.06 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d266bdff5640ca741b7f0c673dc0e759ca757883b07a0e12c44cff0a02bdbdb +size 524288000 diff --git a/ai/model/weights.sbs.07 b/ai/model/weights.sbs.07 new file mode 100644 index 0000000..085744a --- /dev/null +++ b/ai/model/weights.sbs.07 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74865349dd89392c20fc97026637bd02c2c39b32d83d6a1839492925a320c83c +size 524288000 diff --git a/ai/model/weights.sbs.08 b/ai/model/weights.sbs.08 new file mode 100644 index 0000000..0b25406 --- /dev/null +++ b/ai/model/weights.sbs.08 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ed73e823bd892896f6b912717d7d8c0ab8faf402392f8e0672255e0635e47b7 +size 524288000 diff --git a/ai/model/weights.sbs.09 b/ai/model/weights.sbs.09 new file mode 100644 index 0000000..d757253 --- /dev/null +++ b/ai/model/weights.sbs.09 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b138e2db6378e627cfee8a3ff8290a07a79656209aeb8e61a681a21a83aaef04 +size 524288000 diff --git a/ai/model/weights.sbs.10 b/ai/model/weights.sbs.10 new file mode 100644 index 0000000..973c86b --- /dev/null +++ b/ai/model/weights.sbs.10 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62abe21d7197b79b878ad5d41830e1a1441f5e4f9edca6d76fc1d6eb855c252a +size 524288000 diff --git a/ai/model/weights.sbs.11 b/ai/model/weights.sbs.11 new file mode 100644 index 0000000..acef71e --- /dev/null +++ b/ai/model/weights.sbs.11 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d84b1afa6954fd1566830cdaffa209917d5449280a91a369129340a75e0c3e0 +size 524288000 diff --git a/ai/model/weights.sbs.12 b/ai/model/weights.sbs.12 new file mode 100644 index 0000000..75f6111 --- /dev/null +++ b/ai/model/weights.sbs.12 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c4055ce7362615498eccd0bf43595a59959f5db55184a8ffe96217cd8290597 +size 524288000 diff --git a/ai/model/weights.sbs.13 b/ai/model/weights.sbs.13 new file mode 100644 index 0000000..bb4c2a5 --- /dev/null +++ b/ai/model/weights.sbs.13 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51a05088b425c07c087ae82081771757c290385846f3f3db0d59596cbd089ccf +size 524288000 diff --git a/ai/model/weights.sbs.14 b/ai/model/weights.sbs.14 new file mode 100644 index 0000000..7e3fe27 --- /dev/null +++ b/ai/model/weights.sbs.14 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19aaf935a8ade22bbef604ff46b6ef179dd031d61c2fd5ab996e14d8d2d10aaa +size 524288000 diff --git a/ai/model/weights.sbs.15 b/ai/model/weights.sbs.15 new file mode 100644 index 0000000..4c5c94a --- /dev/null +++ b/ai/model/weights.sbs.15 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d637d17c6202583fa0499206054d5fcc38e537e00d755fa007d49c0dd676d6d2 +size 524288000 diff --git a/ai/model/weights.sbs.16 b/ai/model/weights.sbs.16 new file mode 100644 index 0000000..f20f37f --- /dev/null +++ b/ai/model/weights.sbs.16 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb41a489a3e1d89f3b52fe2fbca4b4d81bee5390e310d4e2a7a1df97c4033ae1 +size 524288000 diff --git a/ai/model/weights.sbs.17 b/ai/model/weights.sbs.17 new file mode 100644 index 0000000..788e26a --- /dev/null +++ b/ai/model/weights.sbs.17 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e94759b4f2d72f1a706fe33a39bbdad2d1b3afc95163acfe17581a04b3c69c3 +size 411397632 From 49c8b76f01200864e06605aa9784f019a1e7ac1b Mon Sep 17 00:00:00 2001 From: iWas-Coder Date: Thu, 18 Jul 2024 18:09:06 +0200 Subject: [PATCH 04/15] Basic code generation with LLM from C++ --- .gitattributes | 1 - .gitignore | 3 ++ ai/GNUmakefile | 52 +++++++++++++++++++++++-------- ai/testgen.cc | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 127 insertions(+), 13 deletions(-) create mode 100644 ai/testgen.cc diff --git a/.gitattributes b/.gitattributes index 93a218d..d8b45ad 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3,5 +3,4 @@ *.png binary *.spm filter=lfs diff=lfs merge=lfs -text -*.sbs filter=lfs diff=lfs merge=lfs -text *.sbs.* filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index 95afccc..9cf433c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ +ai/build/ + *.xml test/carbon +ai/testgen diff --git a/ai/GNUmakefile b/ai/GNUmakefile index 3a00872..a5cabe3 100644 --- a/ai/GNUmakefile +++ b/ai/GNUmakefile @@ -1,9 +1,9 @@ PPO_MKDIR = MKDIR PPO_CLEAN = CLEAN +PPO_MERGE = MERGE PPO_CXX = CXX -PPO_AR = AR +PPO_LD = LD -AR = ar -rcs CXX = clang++ OPTIMIZATIONS = -pipe -O3 @@ -27,6 +27,7 @@ SENTENCEPIECE_OBJS := $(patsubst $(SENTENCEPIECE_HDR_DIR)/third_party/protob SENTENCEPIECE_OBJS := $(patsubst $(SENTENCEPIECE_HDR_DIR)/third_party/absl/flags/%.cc, $(SENTENCEPIECE_BUILD_DIR)/%.o, $(SENTENCEPIECE_OBJS)) SENTENCEPIECE_CPPFLAGS = -D HAVE_PTHREAD -isystem $(SENTENCEPIECE_HDR_DIR) -isystem $(SENTENCEPIECE_SRC_DIR)/builtin_pb -isystem $(SENTENCEPIECE_HDR_DIR)/third_party/protobuf-lite SENTENCEPIECE_CXXFLAGS = -std=c++17 $(OPTIMIZATIONS) +SENTENCEPIECE_VERSION := $(shell cat $(SENTENCEPIECE_HDR_DIR)/VERSION.txt) GEMMACPP_HDR_DIR = vendor/gemma.cpp GEMMACPP_SRC_DIR = $(GEMMACPP_HDR_DIR) @@ -40,21 +41,37 @@ GEMMACPP_OBJS := $(patsubst $(GEMMACPP_SRC_DIR)/compression/%.cc, $(GEMMACPP GEMMACPP_CPPFLAGS = -isystem $(GEMMACPP_HDR_DIR) -isystem $(HIGHWAY_HDR_DIR) -isystem $(SENTENCEPIECE_HDR_DIR) GEMMACPP_CXXFLAGS = -std=c++17 $(OPTIMIZATIONS) -DIRS_OUT = $(HIGHWAY_BUILD_DIR) $(SENTENCEPIECE_BUILD_DIR) $(GEMMACPP_BUILD_DIR) +TESTGEN_BUILD_DIR = build +TESTGEN_WEIGHTS_IN := $(wildcard model/weights.sbs.*) +TESTGEN_WEIGHTS_OUT = $(TESTGEN_BUILD_DIR)/weights.sbs +TESTGEN_SRCS = testgen.cc +TESTGEN_OBJS = $(patsubst %.cc, $(TESTGEN_BUILD_DIR)/%.o, $(TESTGEN_SRCS)) +TESTGEN_CPPFLAGS = -isystem $(HIGHWAY_HDR_DIR) -isystem $(GEMMACPP_HDR_DIR) +TESTGEN_CXXFLAGS = -std=c++20 -Wall -Wextra -Wpedantic -Werror $(OPTIMIZATIONS) +TESTGEN_LDFLAGS = -static $(OPTIMIZATIONS) +TESTGEN_OUT = testgen + +DIRS_OUT = $(TESTGEN_BUILD_DIR) $(HIGHWAY_BUILD_DIR) $(SENTENCEPIECE_BUILD_DIR) $(GEMMACPP_BUILD_DIR) .PHONY: all clean mrproper -all: $(DIRS_OUT) libgemma.a +all: $(DIRS_OUT) $(TESTGEN_WEIGHTS_OUT) $(TESTGEN_OUT) @: -libgemma.a: $(HIGHWAY_OBJS) $(SENTENCEPIECE_OBJS) $(GEMMACPP_OBJS) - @echo " $(PPO_AR) $@" - @$(AR) $@ $^ - $(DIRS_OUT): @echo " $(PPO_MKDIR) $@" @mkdir -p $@ +$(TESTGEN_WEIGHTS_OUT): $(TESTGEN_WEIGHTS_IN) + @for i in $^; do \ + echo " $(PPO_MERGE) $$i >> $@"; \ + cat $$i >> $@; \ + done + +$(TESTGEN_OUT): $(HIGHWAY_OBJS) $(SENTENCEPIECE_OBJS) $(GEMMACPP_OBJS) $(TESTGEN_OBJS) + @echo " $(PPO_LD) $@" + @$(CXX) $^ $(TESTGEN_LDFLAGS) -o $@ + $(HIGHWAY_BUILD_DIR)/%.o: $(HIGHWAY_SRC_DIR)/%.cc @echo " $(PPO_CXX) $@" @$(CXX) $(HIGHWAY_CPPFLAGS) $(HIGHWAY_CXXFLAGS) -c -MD $< -o $@ @@ -82,7 +99,7 @@ $(SENTENCEPIECE_BUILD_DIR)/%.o: $(SENTENCEPIECE_HDR_DIR)/third_party/protobuf-li $(SENTENCEPIECE_BUILD_DIR)/%.o: $(SENTENCEPIECE_HDR_DIR)/third_party/absl/flags/%.cc @echo " $(PPO_CXX) $@" @[ ! -e $(SENTENCEPIECE_HDR_DIR)/config.h ] && cp $(SENTENCEPIECE_HDR_DIR)/config.h.in $(SENTENCEPIECE_HDR_DIR)/config.h || true - @sed -i -e 's/@PROJECT_VERSION@/0.2.0/' -e 's/@PROJECT_NAME@/sentencepiece/' $(SENTENCEPIECE_HDR_DIR)/config.h + @sed -i -e 's/@PROJECT_VERSION@/$(SENTENCEPIECE_VERSION)/' -e 's/@PROJECT_NAME@/sentencepiece/' $(SENTENCEPIECE_HDR_DIR)/config.h @$(CXX) $(SENTENCEPIECE_CPPFLAGS) $(SENTENCEPIECE_CXXFLAGS) -c -MD $< -o $@ $(GEMMACPP_BUILD_DIR)/%.o: $(GEMMACPP_SRC_DIR)/gemma/%.cc @@ -105,12 +122,23 @@ $(GEMMACPP_BUILD_DIR)/%.o: $(GEMMACPP_SRC_DIR)/compression/%.cc @echo " $(PPO_CXX) $@" @$(CXX) $(GEMMACPP_CPPFLAGS) $(GEMMACPP_CXXFLAGS) -c -MD $< -o $@ +$(TESTGEN_BUILD_DIR)/%.o: %.cc + @echo " $(PPO_CXX) $@" + @$(CXX) $(TESTGEN_CPPFLAGS) $(TESTGEN_CXXFLAGS) -c -MD $< -o $@ + +-include $(TESTGEN_BUILD_DIR)/*.d -include $(HIGHWAY_BUILD_DIR)/*.d -include $(GEMMACPP_BUILD_DIR)/*.d -include $(SENTENCEPIECE_BUILD_DIR)/*.d clean: - @if [ -d build ]; then \ - echo " $(PPO_CLEAN) build"; \ - rm -r build; \ + @if [ -d $(TESTGEN_BUILD_DIR) ]; then \ + echo " $(PPO_CLEAN) $(TESTGEN_BUILD_DIR)"; \ + rm -r $(TESTGEN_BUILD_DIR); \ + fi + +mrproper: clean + @if [ -e testgen ]; then \ + echo " $(PPO_CLEAN) testgen"; \ + rm testgen; \ fi diff --git a/ai/testgen.cc b/ai/testgen.cc new file mode 100644 index 0000000..63c82ab --- /dev/null +++ b/ai/testgen.cc @@ -0,0 +1,84 @@ +#include +#include "vendor/gemma.cpp/util/app.h" +#include "vendor/gemma.cpp/util/args.h" +#include "vendor/gemma.cpp/gemma/gemma.h" +#include "vendor/gemma.cpp/evals/benchmark_helper.h" + +static constexpr auto system_prompt { + "Just write the function that has been requested, no main, no examples, no nonsense." + "When finished writing the function, do not repeat it, just write it once." + "Do not explain anything, just write the code requested." + "Do not use any markdown formatting at all, just plain text." + "Only write the plain text code with no additional formatting." + "Always use 2 space indenting, no tabs." + "Taking into consideration all of the instructions above, do what is requested next:" +}; + +std::vector tokenize(const std::string &prompt, const gcpp::GemmaTokenizer &tokenizer) { + std::string ctx { std::string(system_prompt) + "\n" + prompt + "\n" }; + std::vector tokens; + HWY_ASSERT(tokenizer.Encode(ctx, &tokens)); + tokens.insert(tokens.begin(), gcpp::BOS_ID); + return tokens; +} + +int main(int argc, char **argv) { + gcpp::LoaderArgs loader(argc, argv); + loader.tokenizer = "model/tokenizer.spm"; + loader.weights = "build/weights.sbs"; + loader.model_type_str = "7b-it"; + loader.weight_type_str = "sfp"; + + gcpp::InferenceArgs inference(argc, argv); + inference.max_tokens = gcpp::kSeqLen; + inference.max_generated_tokens = 1024; + inference.temperature = 0.2f; + inference.deterministic = false; + inference.multiturn = false; + + gcpp::AppArgs app(argc, argv); + app.num_threads = 2; + + if (const char* err { loader.Validate() }) HWY_ABORT("%s", err); + if (const char* err { inference.Validate() }) HWY_ABORT("%s", err); + + hwy::ThreadPool pool(app.num_threads); + if (app.num_threads > 10) gcpp::PinWorkersToCores(pool); + gcpp::Gemma model { gcpp::CreateGemma(loader, pool) }; + gcpp::KVCache kv_cache { gcpp::KVCache::Create(model.Info().model) }; + gcpp::TimingInfo timings; + std::random_device rand_dev; + std::mt19937 rand_gen { rand_dev() }; + + constexpr auto user_prompt { + "Write a function in C++ which checks whether a number is prime." + }; + std::vector prompt { tokenize(user_prompt, model.Tokenizer()) }; + size_t prompt_size { prompt.size() }; + + size_t pos {0}; + auto stream_token = [prompt_size, &pos, &model](int token, float) { + ++pos; + if (pos > prompt_size and token != gcpp::EOS_ID) { + std::string tok; + HWY_ASSERT(model.Tokenizer().Decode(std::vector{token}, &tok)); + std::cout << tok << std::flush; + } + return true; + }; + + gcpp::RuntimeConfig runtime_conf { + .max_tokens = inference.max_tokens, + .max_generated_tokens = inference.max_generated_tokens, + .temperature = inference.temperature, + .gen = &rand_gen, + .stream_token = stream_token + }; + + model.Generate(runtime_conf, prompt, 0, kv_cache, timings); + + std::cout << "\n\nStats:" << std::endl; + std::cout << " prefill_tok_sec: " << timings.prefill_tok_sec << std::endl; + std::cout << " gen_tok_sec: " << timings.gen_tok_sec << std::endl; + std::cout << " time_to_first_token: " << timings.time_to_first_token << std::endl; +} From a84c0a748ce62d893e317107d4fcb56ededfb929 Mon Sep 17 00:00:00 2001 From: iWas-Coder Date: Thu, 18 Jul 2024 22:13:13 +0200 Subject: [PATCH 05/15] Pre-Process and check if it builds --- ai/testgen.cc | 67 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 53 insertions(+), 14 deletions(-) diff --git a/ai/testgen.cc b/ai/testgen.cc index 63c82ab..de83d10 100644 --- a/ai/testgen.cc +++ b/ai/testgen.cc @@ -1,3 +1,5 @@ +#include +#include #include #include "vendor/gemma.cpp/util/app.h" #include "vendor/gemma.cpp/util/args.h" @@ -5,16 +7,23 @@ #include "vendor/gemma.cpp/evals/benchmark_helper.h" static constexpr auto system_prompt { - "Just write the function that has been requested, no main, no examples, no nonsense." - "When finished writing the function, do not repeat it, just write it once." - "Do not explain anything, just write the code requested." - "Do not use any markdown formatting at all, just plain text." - "Only write the plain text code with no additional formatting." - "Always use 2 space indenting, no tabs." - "Taking into consideration all of the instructions above, do what is requested next:" + "Strictly follow the following instructions and rules:" + "- Just write the function that has been requested, no main, no examples, no nonsense." + "- When finished writing the function, do not repeat it, just write it once." + "- Do not explain anything, just write the code requested." + "- Do not use any markdown formatting at all, just plain text." + "- Only write the plain text code with no additional formatting." + "- Always use 2 space indenting, no tabs." + "- Do not write multiple blocks of code, just one." + "- Do not use any third-party dependency, just built-in features." + "Taking into consideration all of the instructions above, perform the following order as strictly as possible:" }; -std::vector tokenize(const std::string &prompt, const gcpp::GemmaTokenizer &tokenizer) { +static constexpr auto user_prompt { + "C++. We have a function that computes GCD of pair of numbers. Write a unit test for it." +}; + +static inline std::vector tokenize(const std::string &prompt, const gcpp::GemmaTokenizer &tokenizer) { std::string ctx { std::string(system_prompt) + "\n" + prompt + "\n" }; std::vector tokens; HWY_ASSERT(tokenizer.Encode(ctx, &tokens)); @@ -22,6 +31,28 @@ std::vector tokenize(const std::string &prompt, const gcpp::GemmaTokenizer return tokens; } +static inline void preprocess_output(std::stringstream &ss) { + std::string line; + std::vector lines; + while (std::getline(ss, line)) lines.emplace_back(line); + ss.str(""); + ss.clear(); + for (const auto &i : lines) { + if (i.find("```") == std::string::npos) ss << i << std::endl; + } +} + +static inline bool it_builds(const std::stringstream &ss) { + std::ofstream ofs { "tmp.cc" }; + if (not ofs) throw std::runtime_error { "unable to open/create file for writing (`./tmp.cc`)" }; + ofs << ss.str(); + ofs.close(); + int result { std::system("clang++ -S tmp.cc -o /dev/null >/dev/null 2>&1") }; + std::remove("tmp.cc"); + if (result != 0) return false; + return true; +} + int main(int argc, char **argv) { gcpp::LoaderArgs loader(argc, argv); loader.tokenizer = "model/tokenizer.spm"; @@ -37,7 +68,9 @@ int main(int argc, char **argv) { inference.multiturn = false; gcpp::AppArgs app(argc, argv); - app.num_threads = 2; + app.num_threads = 1; + if (argc >= 3 and argv[1] == std::string("-t")) app.num_threads = std::atoi(argv[2]); + else std::cout << "WARNING: using 1 thread for inference by default.\n" << std::endl; if (const char* err { loader.Validate() }) HWY_ABORT("%s", err); if (const char* err { inference.Validate() }) HWY_ABORT("%s", err); @@ -50,18 +83,16 @@ int main(int argc, char **argv) { std::random_device rand_dev; std::mt19937 rand_gen { rand_dev() }; - constexpr auto user_prompt { - "Write a function in C++ which checks whether a number is prime." - }; std::vector prompt { tokenize(user_prompt, model.Tokenizer()) }; size_t prompt_size { prompt.size() }; - size_t pos {0}; - auto stream_token = [prompt_size, &pos, &model](int token, float) { + std::stringstream buf; + auto stream_token = [prompt_size, &pos, &buf, &model](int token, float) { ++pos; if (pos > prompt_size and token != gcpp::EOS_ID) { std::string tok; HWY_ASSERT(model.Tokenizer().Decode(std::vector{token}, &tok)); + buf << tok; std::cout << tok << std::flush; } return true; @@ -81,4 +112,12 @@ int main(int argc, char **argv) { std::cout << " prefill_tok_sec: " << timings.prefill_tok_sec << std::endl; std::cout << " gen_tok_sec: " << timings.gen_tok_sec << std::endl; std::cout << " time_to_first_token: " << timings.time_to_first_token << std::endl; + + std::cout << "\n\n1. Pre-Process:" << std::endl; + preprocess_output(buf); + std::cout << buf.str(); + + std::cout << "\n\n2. It builds?:" << std::endl; + if (it_builds(buf)) std::cout << "Yes. Hurray!" << std::endl; + else std::cout << "No. Maybe next time..." << std::endl; } From 8a9aa8bc11b8a090bbfc6193e22affdb71ee0129 Mon Sep 17 00:00:00 2001 From: iWas-Coder Date: Fri, 19 Jul 2024 23:33:12 +0200 Subject: [PATCH 06/15] Added SHA256 checksum verification file for model weights --- ai/GNUmakefile | 5 ++++- ai/model/weights.sha256 | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 ai/model/weights.sha256 diff --git a/ai/GNUmakefile b/ai/GNUmakefile index a5cabe3..8f135cc 100644 --- a/ai/GNUmakefile +++ b/ai/GNUmakefile @@ -5,6 +5,7 @@ PPO_CXX = CXX PPO_LD = LD CXX = clang++ +SHA256SUM = sha256sum -c OPTIMIZATIONS = -pipe -O3 HIGHWAY_HDR_DIR = vendor/highway @@ -44,6 +45,7 @@ GEMMACPP_CXXFLAGS = -std=c++17 $(OPTIMIZATIONS) TESTGEN_BUILD_DIR = build TESTGEN_WEIGHTS_IN := $(wildcard model/weights.sbs.*) TESTGEN_WEIGHTS_OUT = $(TESTGEN_BUILD_DIR)/weights.sbs +TESTGEN_WEIGHTS_SUM = model/weights.sha256 TESTGEN_SRCS = testgen.cc TESTGEN_OBJS = $(patsubst %.cc, $(TESTGEN_BUILD_DIR)/%.o, $(TESTGEN_SRCS)) TESTGEN_CPPFLAGS = -isystem $(HIGHWAY_HDR_DIR) -isystem $(GEMMACPP_HDR_DIR) @@ -63,10 +65,11 @@ $(DIRS_OUT): @mkdir -p $@ $(TESTGEN_WEIGHTS_OUT): $(TESTGEN_WEIGHTS_IN) - @for i in $^; do \ + @[ ! -e $@ ] && for i in $^; do \ echo " $(PPO_MERGE) $$i >> $@"; \ cat $$i >> $@; \ done + @$(SHA256SUM) $(TESTGEN_WEIGHTS_SUM) >/dev/null 2>&1 $(TESTGEN_OUT): $(HIGHWAY_OBJS) $(SENTENCEPIECE_OBJS) $(GEMMACPP_OBJS) $(TESTGEN_OBJS) @echo " $(PPO_LD) $@" diff --git a/ai/model/weights.sha256 b/ai/model/weights.sha256 new file mode 100644 index 0000000..f225380 --- /dev/null +++ b/ai/model/weights.sha256 @@ -0,0 +1 @@ +4703b49c4e7177a949a5e60d91a5078f81e0d9ce80f0afa4ab4cb9af44fd334c build/weights.sbs From 5bc74bc81f6e4f47a816f2a00fc2bc8eabc1ff75 Mon Sep 17 00:00:00 2001 From: iWas-Coder Date: Sat, 20 Jul 2024 02:00:23 +0200 Subject: [PATCH 07/15] Passthrough both source file for context and test file to extend --- ai/testgen.cc | 94 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 71 insertions(+), 23 deletions(-) diff --git a/ai/testgen.cc b/ai/testgen.cc index de83d10..c8091bf 100644 --- a/ai/testgen.cc +++ b/ai/testgen.cc @@ -1,32 +1,53 @@ #include #include #include +#include #include "vendor/gemma.cpp/util/app.h" #include "vendor/gemma.cpp/util/args.h" #include "vendor/gemma.cpp/gemma/gemma.h" #include "vendor/gemma.cpp/evals/benchmark_helper.h" static constexpr auto system_prompt { - "Strictly follow the following instructions and rules:" - "- Just write the function that has been requested, no main, no examples, no nonsense." - "- When finished writing the function, do not repeat it, just write it once." - "- Do not explain anything, just write the code requested." - "- Do not use any markdown formatting at all, just plain text." - "- Only write the plain text code with no additional formatting." - "- Always use 2 space indenting, no tabs." - "- Do not write multiple blocks of code, just one." - "- Do not use any third-party dependency, just built-in features." - "Taking into consideration all of the instructions above, perform the following order as strictly as possible:" + "Strictly follow the following instructions and rules:\n" + "- Just write the function that has been requested, no main, no examples, no nonsense.\n" + "- When finished writing the function, do not repeat it, just write it once.\n" + "- Do not explain anything, just write the code requested.\n" + "- Do not use any markdown formatting at all, just plain text.\n" + "- Only write the plain text code with no additional formatting.\n" + "- Always use 2 space indenting, no tabs.\n" + "- Do not write multiple blocks of code, just one.\n" + "- Do not use any third-party dependency, just built-in features.\n" + "Take all of these instructions into consideration while performing as accurate as possible the following requests.\n" }; - -static constexpr auto user_prompt { - "C++. We have a function that computes GCD of pair of numbers. Write a unit test for it." +static constexpr auto test_code_prompt { + "Here is a C/C++ unit test translation unit:" +}; +static constexpr auto src_code_prompt { + "And this is the C/C++ translation unit that it tests:" }; +static constexpr auto request_prompt { + "Write an extended version of the unit test translation unit that includes additional unit tests that will increase the test coverage of the code under test.\n" +}; + +static inline std::string load_file_contents(const std::string &filepath) { + std::ifstream ifs { filepath }; + if (not ifs) throw std::runtime_error { "unable to open file for reading (`" + filepath + "`)" }; + std::stringstream ss; + ss << "```\n" << ifs.rdbuf() << "\n```\n"; + return ss.str(); +} -static inline std::vector tokenize(const std::string &prompt, const gcpp::GemmaTokenizer &tokenizer) { - std::string ctx { std::string(system_prompt) + "\n" + prompt + "\n" }; +static inline std::vector tokenize(const gcpp::GemmaTokenizer &tokenizer, const std::string &test_code, const std::string &src_code) { + std::string prompt { + std::string(system_prompt) + "\n" + + test_code_prompt + "\n" + + test_code + "\n" + + src_code_prompt + "\n" + + src_code + "\n" + + request_prompt + "\n" + }; std::vector tokens; - HWY_ASSERT(tokenizer.Encode(ctx, &tokens)); + HWY_ASSERT(tokenizer.Encode(prompt, &tokens)); tokens.insert(tokens.begin(), gcpp::BOS_ID); return tokens; } @@ -67,34 +88,61 @@ int main(int argc, char **argv) { inference.deterministic = false; inference.multiturn = false; - gcpp::AppArgs app(argc, argv); + gcpp::AppArgs app { argc, argv }; app.num_threads = 1; if (argc >= 3 and argv[1] == std::string("-t")) app.num_threads = std::atoi(argv[2]); else std::cout << "WARNING: using 1 thread for inference by default.\n" << std::endl; + hwy::ThreadPool thread_pool(app.num_threads); + if (app.num_threads > 10) gcpp::PinWorkersToCores(thread_pool); + + std::string src_file, test_file; + switch (argc) { + case 5: + if (argv[1] != std::string("--src")) HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); + if (not std::filesystem::exists(argv[2])) HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); + if (argv[3] != std::string("--test")) HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); + if (not std::filesystem::exists(argv[4])) HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); + src_file = argv[2]; + test_file = argv[4]; + break; + case 7: + if (argv[3] != std::string("--src")) HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); + if (not std::filesystem::exists(argv[4])) HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); + if (argv[5] != std::string("--test")) HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); + if (not std::filesystem::exists(argv[6])) HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); + src_file = argv[4]; + test_file = argv[6]; + break; + default: + HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); + } + + if (const char* err { loader.Validate() }) HWY_ABORT("%s:%u :: %s", __FILE__, __LINE__, err); + if (const char* err { inference.Validate() }) HWY_ABORT("%s:%u :: %s", __FILE__, __LINE__, err); - if (const char* err { loader.Validate() }) HWY_ABORT("%s", err); - if (const char* err { inference.Validate() }) HWY_ABORT("%s", err); + std::string src_file_contents { load_file_contents(src_file) }; + std::string test_file_contents { load_file_contents(test_file) }; - hwy::ThreadPool pool(app.num_threads); - if (app.num_threads > 10) gcpp::PinWorkersToCores(pool); - gcpp::Gemma model { gcpp::CreateGemma(loader, pool) }; + gcpp::Gemma model { gcpp::CreateGemma(loader, thread_pool) }; gcpp::KVCache kv_cache { gcpp::KVCache::Create(model.Info().model) }; gcpp::TimingInfo timings; std::random_device rand_dev; std::mt19937 rand_gen { rand_dev() }; - std::vector prompt { tokenize(user_prompt, model.Tokenizer()) }; + std::vector prompt { tokenize(model.Tokenizer(), test_file_contents, src_file_contents) }; size_t prompt_size { prompt.size() }; size_t pos {0}; std::stringstream buf; auto stream_token = [prompt_size, &pos, &buf, &model](int token, float) { ++pos; if (pos > prompt_size and token != gcpp::EOS_ID) { + if (pos == prompt_size + 1) std::cout << std::endl << std::flush; std::string tok; HWY_ASSERT(model.Tokenizer().Decode(std::vector{token}, &tok)); buf << tok; std::cout << tok << std::flush; } + else std::cout << "." << std::flush; return true; }; From 2dc15b4a6de80b7da95d6c1a9b31e471057dd815 Mon Sep 17 00:00:00 2001 From: iWas-Coder Date: Sat, 20 Jul 2024 02:04:19 +0200 Subject: [PATCH 08/15] Added build log when verifying hash/checksum of weights file --- ai/GNUmakefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ai/GNUmakefile b/ai/GNUmakefile index 8f135cc..3498658 100644 --- a/ai/GNUmakefile +++ b/ai/GNUmakefile @@ -1,6 +1,7 @@ PPO_MKDIR = MKDIR PPO_CLEAN = CLEAN PPO_MERGE = MERGE +PPO_HASH = HASH PPO_CXX = CXX PPO_LD = LD @@ -69,6 +70,7 @@ $(TESTGEN_WEIGHTS_OUT): $(TESTGEN_WEIGHTS_IN) echo " $(PPO_MERGE) $$i >> $@"; \ cat $$i >> $@; \ done + @echo " $(PPO_HASH) $@" @$(SHA256SUM) $(TESTGEN_WEIGHTS_SUM) >/dev/null 2>&1 $(TESTGEN_OUT): $(HIGHWAY_OBJS) $(SENTENCEPIECE_OBJS) $(GEMMACPP_OBJS) $(TESTGEN_OBJS) From 1d366d5550d78c204077cefe20c3576155c9a46c Mon Sep 17 00:00:00 2001 From: iWas-Coder Date: Sat, 20 Jul 2024 02:13:33 +0200 Subject: [PATCH 09/15] fix: factor out repeated code into macros --- ai/testgen.cc | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/ai/testgen.cc b/ai/testgen.cc index c8091bf..c9105b2 100644 --- a/ai/testgen.cc +++ b/ai/testgen.cc @@ -7,6 +7,9 @@ #include "vendor/gemma.cpp/gemma/gemma.h" #include "vendor/gemma.cpp/evals/benchmark_helper.h" +#define CARBON_AI_ABORT_ERROR(err) HWY_ABORT("%s:%u :: %s", __FILE__, __LINE__, (err)) +#define CARBON_AI_ABORT_USAGE HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]) + static constexpr auto system_prompt { "Strictly follow the following instructions and rules:\n" "- Just write the function that has been requested, no main, no examples, no nonsense.\n" @@ -98,27 +101,27 @@ int main(int argc, char **argv) { std::string src_file, test_file; switch (argc) { case 5: - if (argv[1] != std::string("--src")) HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); - if (not std::filesystem::exists(argv[2])) HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); - if (argv[3] != std::string("--test")) HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); - if (not std::filesystem::exists(argv[4])) HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); + if (argv[1] != std::string("--src")) CARBON_AI_ABORT_USAGE; + if (not std::filesystem::exists(argv[2])) CARBON_AI_ABORT_USAGE; + if (argv[3] != std::string("--test")) CARBON_AI_ABORT_USAGE; + if (not std::filesystem::exists(argv[4])) CARBON_AI_ABORT_USAGE; src_file = argv[2]; test_file = argv[4]; break; case 7: - if (argv[3] != std::string("--src")) HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); - if (not std::filesystem::exists(argv[4])) HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); - if (argv[5] != std::string("--test")) HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); - if (not std::filesystem::exists(argv[6])) HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); + if (argv[3] != std::string("--src")) CARBON_AI_ABORT_USAGE; + if (not std::filesystem::exists(argv[4])) CARBON_AI_ABORT_USAGE; + if (argv[5] != std::string("--test")) CARBON_AI_ABORT_USAGE; + if (not std::filesystem::exists(argv[6])) CARBON_AI_ABORT_USAGE; src_file = argv[4]; test_file = argv[6]; break; default: - HWY_ABORT("usage: %s [-t ] --src --test ", argv[0]); + CARBON_AI_ABORT_USAGE; } - if (const char* err { loader.Validate() }) HWY_ABORT("%s:%u :: %s", __FILE__, __LINE__, err); - if (const char* err { inference.Validate() }) HWY_ABORT("%s:%u :: %s", __FILE__, __LINE__, err); + if (const char* err { loader.Validate() }) CARBON_AI_ABORT_ERROR(err); + if (const char* err { inference.Validate() }) CARBON_AI_ABORT_ERROR(err); std::string src_file_contents { load_file_contents(src_file) }; std::string test_file_contents { load_file_contents(test_file) }; From adf8a026c67e08c46de6e2cb91c207d855ff44c8 Mon Sep 17 00:00:00 2001 From: iWas-Coder Date: Sat, 20 Jul 2024 19:44:18 +0200 Subject: [PATCH 10/15] Migrating model from GH over to HF --- .gitattributes | 3 --- ai/model/tokenizer.spm | 3 --- ai/model/weights.sbs.00 | 3 --- ai/model/weights.sbs.01 | 3 --- ai/model/weights.sbs.02 | 3 --- ai/model/weights.sbs.03 | 3 --- ai/model/weights.sbs.04 | 3 --- ai/model/weights.sbs.05 | 3 --- ai/model/weights.sbs.06 | 3 --- ai/model/weights.sbs.07 | 3 --- ai/model/weights.sbs.08 | 3 --- ai/model/weights.sbs.09 | 3 --- ai/model/weights.sbs.10 | 3 --- ai/model/weights.sbs.11 | 3 --- ai/model/weights.sbs.12 | 3 --- ai/model/weights.sbs.13 | 3 --- ai/model/weights.sbs.14 | 3 --- ai/model/weights.sbs.15 | 3 --- ai/model/weights.sbs.16 | 3 --- ai/model/weights.sbs.17 | 3 --- ai/model/weights.sha256 | 1 - 21 files changed, 61 deletions(-) delete mode 100644 ai/model/tokenizer.spm delete mode 100644 ai/model/weights.sbs.00 delete mode 100644 ai/model/weights.sbs.01 delete mode 100644 ai/model/weights.sbs.02 delete mode 100644 ai/model/weights.sbs.03 delete mode 100644 ai/model/weights.sbs.04 delete mode 100644 ai/model/weights.sbs.05 delete mode 100644 ai/model/weights.sbs.06 delete mode 100644 ai/model/weights.sbs.07 delete mode 100644 ai/model/weights.sbs.08 delete mode 100644 ai/model/weights.sbs.09 delete mode 100644 ai/model/weights.sbs.10 delete mode 100644 ai/model/weights.sbs.11 delete mode 100644 ai/model/weights.sbs.12 delete mode 100644 ai/model/weights.sbs.13 delete mode 100644 ai/model/weights.sbs.14 delete mode 100644 ai/model/weights.sbs.15 delete mode 100644 ai/model/weights.sbs.16 delete mode 100644 ai/model/weights.sbs.17 delete mode 100644 ai/model/weights.sha256 diff --git a/.gitattributes b/.gitattributes index d8b45ad..90eaec1 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,6 +1,3 @@ * text eol=lf *.png binary - -*.spm filter=lfs diff=lfs merge=lfs -text -*.sbs.* filter=lfs diff=lfs merge=lfs -text diff --git a/ai/model/tokenizer.spm b/ai/model/tokenizer.spm deleted file mode 100644 index 71a98ce..0000000 --- a/ai/model/tokenizer.spm +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:583f2ebd2a1936009b7da991ea255504db68c7a9713a78673d1335a87098966c -size 4241023 diff --git a/ai/model/weights.sbs.00 b/ai/model/weights.sbs.00 deleted file mode 100644 index aa8c017..0000000 --- a/ai/model/weights.sbs.00 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aef14c4d17f7a2624b4bc9d128cad7490dbd11637ecd914273952695df69d776 -size 524288000 diff --git a/ai/model/weights.sbs.01 b/ai/model/weights.sbs.01 deleted file mode 100644 index 0a89033..0000000 --- a/ai/model/weights.sbs.01 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1435edceed0b761336abd0580ba3db7377b140c65c5772f7fec813287d8d20bd -size 524288000 diff --git a/ai/model/weights.sbs.02 b/ai/model/weights.sbs.02 deleted file mode 100644 index e714d48..0000000 --- a/ai/model/weights.sbs.02 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7a88ff63ce07b3aede351fb4fb29b124536bad59de804d73085602a3ec878617 -size 524288000 diff --git a/ai/model/weights.sbs.03 b/ai/model/weights.sbs.03 deleted file mode 100644 index b9a7f56..0000000 --- a/ai/model/weights.sbs.03 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c81c85b21fc4ccd6ed2fa1d6a7d7de560c98a26246d756fa39806589dc4ad1ea -size 524288000 diff --git a/ai/model/weights.sbs.04 b/ai/model/weights.sbs.04 deleted file mode 100644 index a017fc3..0000000 --- a/ai/model/weights.sbs.04 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:861c637fdfeec40e862b66b68ad82f7ef53738efb6aa0848b1f280e4ccaaba57 -size 524288000 diff --git a/ai/model/weights.sbs.05 b/ai/model/weights.sbs.05 deleted file mode 100644 index 1ecf4b3..0000000 --- a/ai/model/weights.sbs.05 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5da28afa300e3f8ff6f0a39b991ce77a5b2ba7b67a0e089a9cef40aaedf1361b -size 524288000 diff --git a/ai/model/weights.sbs.06 b/ai/model/weights.sbs.06 deleted file mode 100644 index 647f495..0000000 --- a/ai/model/weights.sbs.06 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2d266bdff5640ca741b7f0c673dc0e759ca757883b07a0e12c44cff0a02bdbdb -size 524288000 diff --git a/ai/model/weights.sbs.07 b/ai/model/weights.sbs.07 deleted file mode 100644 index 085744a..0000000 --- a/ai/model/weights.sbs.07 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:74865349dd89392c20fc97026637bd02c2c39b32d83d6a1839492925a320c83c -size 524288000 diff --git a/ai/model/weights.sbs.08 b/ai/model/weights.sbs.08 deleted file mode 100644 index 0b25406..0000000 --- a/ai/model/weights.sbs.08 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7ed73e823bd892896f6b912717d7d8c0ab8faf402392f8e0672255e0635e47b7 -size 524288000 diff --git a/ai/model/weights.sbs.09 b/ai/model/weights.sbs.09 deleted file mode 100644 index d757253..0000000 --- a/ai/model/weights.sbs.09 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b138e2db6378e627cfee8a3ff8290a07a79656209aeb8e61a681a21a83aaef04 -size 524288000 diff --git a/ai/model/weights.sbs.10 b/ai/model/weights.sbs.10 deleted file mode 100644 index 973c86b..0000000 --- a/ai/model/weights.sbs.10 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:62abe21d7197b79b878ad5d41830e1a1441f5e4f9edca6d76fc1d6eb855c252a -size 524288000 diff --git a/ai/model/weights.sbs.11 b/ai/model/weights.sbs.11 deleted file mode 100644 index acef71e..0000000 --- a/ai/model/weights.sbs.11 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5d84b1afa6954fd1566830cdaffa209917d5449280a91a369129340a75e0c3e0 -size 524288000 diff --git a/ai/model/weights.sbs.12 b/ai/model/weights.sbs.12 deleted file mode 100644 index 75f6111..0000000 --- a/ai/model/weights.sbs.12 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0c4055ce7362615498eccd0bf43595a59959f5db55184a8ffe96217cd8290597 -size 524288000 diff --git a/ai/model/weights.sbs.13 b/ai/model/weights.sbs.13 deleted file mode 100644 index bb4c2a5..0000000 --- a/ai/model/weights.sbs.13 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:51a05088b425c07c087ae82081771757c290385846f3f3db0d59596cbd089ccf -size 524288000 diff --git a/ai/model/weights.sbs.14 b/ai/model/weights.sbs.14 deleted file mode 100644 index 7e3fe27..0000000 --- a/ai/model/weights.sbs.14 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:19aaf935a8ade22bbef604ff46b6ef179dd031d61c2fd5ab996e14d8d2d10aaa -size 524288000 diff --git a/ai/model/weights.sbs.15 b/ai/model/weights.sbs.15 deleted file mode 100644 index 4c5c94a..0000000 --- a/ai/model/weights.sbs.15 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d637d17c6202583fa0499206054d5fcc38e537e00d755fa007d49c0dd676d6d2 -size 524288000 diff --git a/ai/model/weights.sbs.16 b/ai/model/weights.sbs.16 deleted file mode 100644 index f20f37f..0000000 --- a/ai/model/weights.sbs.16 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bb41a489a3e1d89f3b52fe2fbca4b4d81bee5390e310d4e2a7a1df97c4033ae1 -size 524288000 diff --git a/ai/model/weights.sbs.17 b/ai/model/weights.sbs.17 deleted file mode 100644 index 788e26a..0000000 --- a/ai/model/weights.sbs.17 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5e94759b4f2d72f1a706fe33a39bbdad2d1b3afc95163acfe17581a04b3c69c3 -size 411397632 diff --git a/ai/model/weights.sha256 b/ai/model/weights.sha256 deleted file mode 100644 index f225380..0000000 --- a/ai/model/weights.sha256 +++ /dev/null @@ -1 +0,0 @@ -4703b49c4e7177a949a5e60d91a5078f81e0d9ce80f0afa4ab4cb9af44fd334c build/weights.sbs From 3fc4feb3fc315962780652885ede3af3368537cf Mon Sep 17 00:00:00 2001 From: iWas-Coder Date: Sat, 20 Jul 2024 20:00:44 +0200 Subject: [PATCH 11/15] Added the actual model as a submodule --- .gitmodules | 3 +++ ai/model | 1 + 2 files changed, 4 insertions(+) create mode 160000 ai/model diff --git a/.gitmodules b/.gitmodules index f43598f..6c7d5cc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "ai/vendor/gemma.cpp"] path = ai/vendor/gemma.cpp url = https://github.com/google/gemma.cpp +[submodule "ai/model"] + path = ai/model + url = https://huggingface.co/sparky-game/carbon diff --git a/ai/model b/ai/model new file mode 160000 index 0000000..fd286b2 --- /dev/null +++ b/ai/model @@ -0,0 +1 @@ +Subproject commit fd286b2a49aaf9bd45a2365d28d350e8b439f1aa From 63b1eacca462dd4eaa3ef7210abef4d0a401a3cd Mon Sep 17 00:00:00 2001 From: iWas-Coder Date: Sat, 20 Jul 2024 20:07:09 +0200 Subject: [PATCH 12/15] fix: do not fail because the frag. files had changed and the merged exists --- ai/GNUmakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ai/GNUmakefile b/ai/GNUmakefile index 3498658..418daac 100644 --- a/ai/GNUmakefile +++ b/ai/GNUmakefile @@ -69,7 +69,7 @@ $(TESTGEN_WEIGHTS_OUT): $(TESTGEN_WEIGHTS_IN) @[ ! -e $@ ] && for i in $^; do \ echo " $(PPO_MERGE) $$i >> $@"; \ cat $$i >> $@; \ - done + done || true @echo " $(PPO_HASH) $@" @$(SHA256SUM) $(TESTGEN_WEIGHTS_SUM) >/dev/null 2>&1 From b53e84b08d98ac03ffa7aa4500db9701c7b0b88f Mon Sep 17 00:00:00 2001 From: iWas-Coder Date: Sat, 20 Jul 2024 20:59:39 +0200 Subject: [PATCH 13/15] Output git-style diff between original test code and generated one --- .gitignore | 1 + ai/testgen.cc | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/.gitignore b/.gitignore index 9cf433c..a05c0f3 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ ai/build/ *.xml test/carbon ai/testgen +ai/*.diff diff --git a/ai/testgen.cc b/ai/testgen.cc index c9105b2..87a6875 100644 --- a/ai/testgen.cc +++ b/ai/testgen.cc @@ -171,4 +171,12 @@ int main(int argc, char **argv) { std::cout << "\n\n2. It builds?:" << std::endl; if (it_builds(buf)) std::cout << "Yes. Hurray!" << std::endl; else std::cout << "No. Maybe next time..." << std::endl; + + std::ofstream ofs { "tmp.cc" }; + if (not ofs) throw std::runtime_error { "unable to open/create file for writing (`./tmp.cc`)" }; + ofs << buf.str(); + ofs.close(); + [[maybe_unused]] int result { std::system(("diff -u " + test_file + " tmp.cc > testgen.diff").c_str()) }; + std::remove("tmp.cc"); + std::cout << "\nOutput from BSD Carbon AI written to: `testgen.diff`" << std::endl; } From 41bba4894ca973e51e4f7f412411aa8347a7476d Mon Sep 17 00:00:00 2001 From: iWas-Coder Date: Sat, 20 Jul 2024 21:16:40 +0200 Subject: [PATCH 14/15] fix: removed unnecessary info getting outputted to stdout --- ai/testgen.cc | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/ai/testgen.cc b/ai/testgen.cc index 87a6875..87e1f0b 100644 --- a/ai/testgen.cc +++ b/ai/testgen.cc @@ -20,6 +20,7 @@ static constexpr auto system_prompt { "- Always use 2 space indenting, no tabs.\n" "- Do not write multiple blocks of code, just one.\n" "- Do not use any third-party dependency, just built-in features.\n" + "- Do not remove any original content, always extend it while maintaining original code.\n" "Take all of these instructions into consideration while performing as accurate as possible the following requests.\n" }; static constexpr auto test_code_prompt { @@ -164,13 +165,9 @@ int main(int argc, char **argv) { std::cout << " gen_tok_sec: " << timings.gen_tok_sec << std::endl; std::cout << " time_to_first_token: " << timings.time_to_first_token << std::endl; - std::cout << "\n\n1. Pre-Process:" << std::endl; preprocess_output(buf); - std::cout << buf.str(); - - std::cout << "\n\n2. It builds?:" << std::endl; - if (it_builds(buf)) std::cout << "Yes. Hurray!" << std::endl; - else std::cout << "No. Maybe next time..." << std::endl; + if (it_builds(buf)) std::cout << "It builds (:D)" << std::endl; + else std::cout << "It doesn't build (D:)" << std::endl; std::ofstream ofs { "tmp.cc" }; if (not ofs) throw std::runtime_error { "unable to open/create file for writing (`./tmp.cc`)" }; From 6342d7710517e41d6a982537a6d691e27b666dc8 Mon Sep 17 00:00:00 2001 From: iWas-Coder Date: Sat, 20 Jul 2024 21:22:13 +0200 Subject: [PATCH 15/15] fix: newline is needed after printing the stats --- ai/testgen.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ai/testgen.cc b/ai/testgen.cc index 87e1f0b..a4d84ab 100644 --- a/ai/testgen.cc +++ b/ai/testgen.cc @@ -166,8 +166,8 @@ int main(int argc, char **argv) { std::cout << " time_to_first_token: " << timings.time_to_first_token << std::endl; preprocess_output(buf); - if (it_builds(buf)) std::cout << "It builds (:D)" << std::endl; - else std::cout << "It doesn't build (D:)" << std::endl; + if (it_builds(buf)) std::cout << "\nIt builds (:D)" << std::endl; + else std::cout << "\nIt doesn't build (D:)" << std::endl; std::ofstream ofs { "tmp.cc" }; if (not ofs) throw std::runtime_error { "unable to open/create file for writing (`./tmp.cc`)" };