forked from ggerganov/llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'upstream/concedo'
- Loading branch information
Showing
54 changed files
with
5,154 additions
and
860 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
default: koboldcpp koboldcpp_failsafe koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast | ||
default: koboldcpp koboldcpp_failsafe koboldcpp_openblas koboldcpp_openblas_noavx2 koboldcpp_clblast koboldcpp_cublas | ||
tools: quantize_gpt2 quantize_gptj quantize_llama quantize_neox quantize_mpt | ||
dev: koboldcpp_openblas | ||
dev2: koboldcpp_clblast | ||
|
@@ -53,6 +53,9 @@ NONECFLAGS = | |
OPENBLAS_FLAGS = -DGGML_USE_OPENBLAS -I/usr/local/include/openblas | ||
CLBLAST_FLAGS = -DGGML_USE_CLBLAST | ||
FAILSAFE_FLAGS = -DUSE_FAILSAFE | ||
CUBLAS_FLAGS = -DGGML_USE_CUBLAS | ||
CUBLASLD_FLAGS = | ||
CUBLAS_OBJS = | ||
|
||
#lets try enabling everything | ||
CFLAGS += -pthread -s | ||
|
@@ -133,10 +136,9 @@ endif | |
|
||
# it is recommended to use the CMAKE file to build for cublas if you can - will likely work better | ||
ifdef LLAMA_CUBLAS | ||
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include | ||
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include | ||
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib | ||
OBJS += ggml-cuda.o ggml_v2-cuda.o | ||
CUBLAS_FLAGS = -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include | ||
CUBLASLD_FLAGS = -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib | ||
CUBLAS_OBJS = ggml-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o | ||
NVCC = nvcc | ||
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native | ||
ifdef LLAMA_CUDA_DMMV_X | ||
|
@@ -158,9 +160,11 @@ else | |
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2 | ||
endif | ||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h | ||
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@ | ||
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@ | ||
ggml_v2-cuda.o: otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h | ||
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@ | ||
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@ | ||
ggml_v2-cuda-legacy.o: otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h | ||
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) $(CUBLAS_FLAGS) $(CUBLAS_CXXFLAGS) -Wno-pedantic -c $< -o $@ | ||
endif # LLAMA_CUBLAS | ||
|
||
ifdef LLAMA_HIPBLAS | ||
|
@@ -225,15 +229,19 @@ FAILSAFE_BUILD = | |
OPENBLAS_BUILD = | ||
OPENBLAS_NOAVX2_BUILD = | ||
CLBLAST_BUILD = | ||
CLBLAST_NOAVX2_BUILD = | ||
CUBLAS_BUILD = | ||
|
||
ifeq ($(OS),Windows_NT) | ||
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o [email protected] $(LDFLAGS) | ||
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o [email protected] $(LDFLAGS) | ||
OPENBLAS_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o [email protected] $(LDFLAGS) | ||
OPENBLAS_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ lib/libopenblas.lib -shared -o [email protected] $(LDFLAGS) | ||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o [email protected] $(LDFLAGS) | ||
CLBLAST_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ lib/OpenCL.lib lib/clblast.lib -shared -o [email protected] $(LDFLAGS) | ||
|
||
ifdef LLAMA_CUBLAS | ||
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o [email protected] $(CUBLASLD_FLAGS) $(LDFLAGS) | ||
endif | ||
|
||
else | ||
DEFAULT_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o [email protected] $(LDFLAGS) | ||
FAILSAFE_BUILD = $(CXX) $(CXXFLAGS) $^ -shared -o [email protected] $(LDFLAGS) | ||
|
@@ -244,20 +252,26 @@ else | |
ifdef LLAMA_CLBLAST | ||
ifeq ($(UNAME_S),Darwin) | ||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) | ||
CLBLAST_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -framework OpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) | ||
else | ||
CLBLAST_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) | ||
CLBLAST_NOAVX2_BUILD = $(CXX) $(CXXFLAGS) $^ -lclblast -lOpenCL $(ARCH_ADD) -lopenblas -shared -o $@.so $(LDFLAGS) | ||
endif | ||
endif | ||
|
||
ifdef LLAMA_CUBLAS | ||
CUBLAS_BUILD = $(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) $^ -shared -o [email protected] $(CUBLASLD_FLAGS) $(LDFLAGS) | ||
endif | ||
|
||
ifndef LLAMA_OPENBLAS | ||
ifndef LLAMA_CLBLAST | ||
ifndef LLAMA_CUBLAS | ||
OPENBLAS_BUILD = @echo 'Your OS $(OS) does not appear to be Windows. For faster speeds, install and link a BLAS library. Set LLAMA_OPENBLAS=1 to compile with OpenBLAS support or LLAMA_CLBLAST=1 to compile with ClBlast support. This is just a reminder, not an error.' | ||
endif | ||
endif | ||
endif | ||
endif | ||
|
||
|
||
|
||
# | ||
# Print build information | ||
# | ||
|
@@ -287,8 +301,8 @@ ggml_openblas_noavx2.o: ggml.c ggml.h | |
$(CC) $(CFLAGS) $(SIMPLECFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ | ||
ggml_clblast.o: ggml.c ggml.h | ||
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ | ||
ggml_clblast_noavx2.o: ggml.c ggml.h | ||
$(CC) $(CFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ | ||
ggml_cublas.o: ggml.c ggml.h | ||
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) -c $< -o $@ | ||
|
||
#quants K | ||
k_quants.o: k_quants.c k_quants.h ggml.h ggml-cuda.h | ||
|
@@ -309,8 +323,8 @@ ggml_v2_openblas_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h | |
$(CC) $(CFLAGS) $(SIMPLECFLAGS) $(OPENBLAS_FLAGS) -c $< -o $@ | ||
ggml_v2_clblast.o: otherarch/ggml_v2.c otherarch/ggml_v2.h | ||
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ | ||
ggml_v2_clblast_noavx2.o: otherarch/ggml_v2.c otherarch/ggml_v2.h | ||
$(CC) $(CFLAGS) $(SIMPLECFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ | ||
ggml_v2_cublas.o: otherarch/ggml_v2.c otherarch/ggml_v2.h | ||
$(CC) $(CFLAGS) $(FULLCFLAGS) $(CUBLAS_FLAGS) -c $< -o $@ | ||
|
||
#extreme old version compat | ||
ggml_v1.o: otherarch/ggml_v1.c otherarch/ggml_v1.h | ||
|
@@ -339,9 +353,11 @@ gpttype_adapter.o: gpttype_adapter.cpp | |
$(CXX) $(CXXFLAGS) -c $< -o $@ | ||
gpttype_adapter_clblast.o: gpttype_adapter.cpp | ||
$(CXX) $(CXXFLAGS) $(CLBLAST_FLAGS) -c $< -o $@ | ||
gpttype_adapter_cublas.o: gpttype_adapter.cpp | ||
$(CXX) $(CXXFLAGS) $(CUBLAS_FLAGS) -c $< -o $@ | ||
|
||
clean: | ||
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_openblas_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so | ||
rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_openblas_noavx2.dll koboldcpp_clblast.dll koboldcpp_cublas.dll koboldcpp.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_openblas_noavx2.so koboldcpp_clblast.so koboldcpp_cublas.so | ||
|
||
main: examples/main/main.cpp build-info.h ggml.o k_quants.o llama.o common.o $(OBJS) | ||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) | ||
|
@@ -360,8 +376,8 @@ koboldcpp_openblas_noavx2: ggml_openblas_noavx2.o ggml_v2_openblas_noavx2.o ggml | |
$(OPENBLAS_NOAVX2_BUILD) | ||
koboldcpp_clblast: ggml_clblast.o ggml_v2_clblast.o ggml_v1.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants.o $(OBJS) | ||
$(CLBLAST_BUILD) | ||
koboldcpp_clblast_noavx2: ggml_clblast_noavx2.o ggml_v2_clblast_noavx2.o ggml_v1_failsafe.o expose.o common.o gpttype_adapter_clblast.o ggml-opencl.o ggml_v2-opencl.o ggml_v2-opencl-legacy.o k_quants_noavx2.o $(OBJS) | ||
$(CLBLAST_NOAVX2_BUILD) | ||
koboldcpp_cublas: ggml_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o k_quants.o $(CUBLAS_OBJS) $(OBJS) | ||
$(CUBLAS_BUILD) | ||
|
||
quantize_llama: examples/quantize/quantize.cpp ggml.o llama.o k_quants.o | ||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
PandaGPT | ||
MiniGPT-4 | ||
*.pth | ||
|
Oops, something went wrong.