@@ -64,10 +64,14 @@ TEST_TARGETS = \
6464 tests/test-tokenizer-1-spm
6565
6666# Legacy build targets that were renamed in #7809, but should still be removed when the project is cleaned
67- LEGACY_TARGETS = main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
67+ LEGACY_TARGETS_CLEAN = main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
6868 simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \
6969 retrieval speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm
7070
71+ # Legacy build targets that were renamed in #7809, but we want to build binaries that for them that output a deprecation warning if people try to use them.
72+ # We don't want to clutter things too much, so we only build replacements for the most commonly used binaries.
73+ LEGACY_TARGETS_BUILD = main quantize perplexity embedding server finetune
74+
7175# Deprecation aliases
7276ifdef LLAMA_CUBLAS
7377$(error LLAMA_CUBLAS is removed. Use GGML_CUDA instead.)
@@ -193,7 +197,7 @@ ifdef GGML_RPC
193197 BUILD_TARGETS += rpc-server
194198endif
195199
196- default : $(BUILD_TARGETS )
200+ default : $(BUILD_TARGETS ) $( LEGACY_TARGETS_BUILD )
197201
198202test : $(TEST_TARGETS )
199203 @failures=0; \
@@ -228,7 +232,7 @@ test: $(TEST_TARGETS)
228232 fi
229233 @echo ' All tests passed.'
230234
231- all : $(BUILD_TARGETS ) $(TEST_TARGETS )
235+ all : $(BUILD_TARGETS ) $(TEST_TARGETS ) $( LEGACY_TARGETS_BUILD )
232236
233237ifdef RISCV_CROSS_COMPILE
234238CC := riscv64-unknown-linux-gnu-gcc
@@ -245,17 +249,22 @@ MK_CFLAGS = -std=c11 -fPIC
245249MK_CXXFLAGS = -std=c++11 -fPIC
246250MK_NVCCFLAGS = -std=c++11
247251
248- ifndef LLAMA_NO_CCACHE
252+ ifdef LLAMA_NO_CCACHE
253+ GGML_NO_CCACHE := 1
254+ DEPRECATE_WARNING := 1
255+ endif
256+
257+ ifndef GGML_NO_CCACHE
249258CCACHE := $(shell which ccache)
250259ifdef CCACHE
251260export CCACHE_SLOPPINESS = time_macros
252- $(info I ccache found, compilation results will be cached. Disable with LLAMA_NO_CCACHE .)
261+ $(info I ccache found, compilation results will be cached. Disable with GGML_NO_CCACHE .)
253262CC := $(CCACHE ) $(CC )
254263CXX := $(CCACHE ) $(CXX )
255264else
256265$(info I ccache not found. Consider installing it for faster compilation.)
257266endif # CCACHE
258- endif # LLAMA_NO_CCACHE
267+ endif # GGML_NO_CCACHE
259268
260269# clock_gettime came in POSIX.1b (1993)
261270# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
@@ -545,7 +554,7 @@ endif # GGML_BLIS
545554
546555ifndef GGML_NO_LLAMAFILE
547556 MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
548- OBJ_GGML += ggml/src/sgemm.o
557+ OBJ_GGML += ggml/src/llamafile/ sgemm.o
549558endif
550559
551560ifdef GGML_RPC
@@ -826,7 +835,8 @@ OBJ_GGML += \
826835 ggml/src/ggml.o \
827836 ggml/src/ggml-alloc.o \
828837 ggml/src/ggml-backend.o \
829- ggml/src/ggml-quants.o
838+ ggml/src/ggml-quants.o \
839+ ggml/src/ggml-aarch64.o
830840
831841OBJ_LLAMA = \
832842 src/llama.o \
@@ -926,6 +936,7 @@ $(info - LLAMA_NO_LLAMAFILE)
926936$(info - LLAMA_NO_ACCELERATE)
927937$(info - LLAMA_NO_OPENMP)
928938$(info - LLAMA_NO_METAL)
939+ $(info - LLAMA_NO_CCACHE)
929940$(info )
930941endif
931942
@@ -959,15 +970,22 @@ ggml/src/ggml-quants.o: \
959970 ggml/src/ggml-common.h
960971 $(CC ) $(CFLAGS ) -c $< -o $@
961972
973+ ggml/src/ggml-aarch64.o : \
974+ ggml/src/ggml-aarch64.c \
975+ ggml/include/ggml.h \
976+ ggml/src/ggml-aarch64.h \
977+ ggml/src/ggml-common.h
978+ $(CC ) $(CFLAGS ) -c $< -o $@
979+
962980ggml/src/ggml-blas.o : \
963981 ggml/src/ggml-blas.cpp \
964982 ggml/include/ggml-blas.h
965983 $(CXX ) $(CXXFLAGS ) -c $< -o $@
966984
967985ifndef GGML_NO_LLAMAFILE
968- ggml/src/sgemm.o : \
969- ggml/src/sgemm.cpp \
970- ggml/src/sgemm.h \
986+ ggml/src/llamafile/ sgemm.o : \
987+ ggml/src/llamafile/ sgemm.cpp \
988+ ggml/src/llamafile/ sgemm.h \
971989 ggml/include/ggml.h
972990 $(CXX ) $(CXXFLAGS ) -c $< -o $@
973991endif # GGML_NO_LLAMAFILE
@@ -1092,7 +1110,7 @@ clean:
10921110 rm -vrf ggml/src/ggml-cuda/template-instances/* .o
10931111 rm -rvf $(BUILD_TARGETS )
10941112 rm -rvf $(TEST_TARGETS )
1095- rm -rvf $(LEGACY_TARGETS )
1113+ rm -rvf $(LEGACY_TARGETS_CLEAN )
10961114 find examples pocs -type f -name " *.o" -delete
10971115
10981116#
@@ -1488,3 +1506,61 @@ llama-q8dot: pocs/vdot/q8dot.cpp ggml/src/ggml.o \
14881506 $(OBJ_GGML )
14891507 $(CXX ) $(CXXFLAGS ) -c $< -o $(call GET_OBJ_FILE, $< )
14901508 $(CXX ) $(CXXFLAGS ) $(filter-out $< ,$^ ) $(call GET_OBJ_FILE, $< ) -o $@ $(LDFLAGS )
1509+
1510+ #
1511+ # Deprecated binaries that we want to keep around long enough for people to migrate to the new filenames, then these can be removed.
1512+ #
1513+ # Mark legacy binary targets as .PHONY so that they are always checked.
1514+ .PHONY : main quantize perplexity embedding server finetune
1515+
1516+ # NOTE: We currently will always build the deprecation-warning `main` and `server` binaries to help users migrate.
1517+ # Eventually we will want to remove these target from building all the time.
1518+ main : examples/deprecation-warning/deprecation-warning.cpp
1519+ $(CXX ) $(CXXFLAGS ) -c $< -o $(call GET_OBJ_FILE, $< )
1520+ $(CXX ) $(CXXFLAGS ) $(filter-out $< ,$^ ) $(call GET_OBJ_FILE, $< ) -o $@ $(LDFLAGS )
1521+ @echo " NOTICE: The 'main' binary is deprecated. Please use 'llama-cli' instead."
1522+
1523+ server : examples/deprecation-warning/deprecation-warning.cpp
1524+ $(CXX ) $(CXXFLAGS ) -c $< -o $(call GET_OBJ_FILE, $< )
1525+ $(CXX ) $(CXXFLAGS ) $(filter-out % .h $< ,$^ ) $(call GET_OBJ_FILE, $< ) -o $@ $(LDFLAGS )
1526+ @echo " NOTICE: The 'server' binary is deprecated. Please use 'llama-server' instead."
1527+
1528+ quantize : examples/deprecation-warning/deprecation-warning.cpp
1529+ ifneq (,$(wildcard quantize) )
1530+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1531+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1532+ @echo "# ########"
1533+ @echo "WARNING: The 'quantize' binary is deprecated. Please use 'llama-quantize' instead."
1534+ @echo " Remove the 'quantize' binary to remove this warning."
1535+ @echo "# ########"
1536+ endif
1537+
1538+ perplexity : examples/deprecation-warning/deprecation-warning.cpp
1539+ ifneq (,$(wildcard perplexity) )
1540+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1541+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1542+ @echo "# ########"
1543+ @echo "WARNING: The 'perplexity' binary is deprecated. Please use 'llama-perplexity' instead."
1544+ @echo " Remove the 'perplexity' binary to remove this warning."
1545+ @echo "# ########"
1546+ endif
1547+
1548+ embedding : examples/deprecation-warning/deprecation-warning.cpp
1549+ ifneq (,$(wildcard embedding) )
1550+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1551+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1552+ @echo "# ########"
1553+ @echo "WARNING: The 'embedding' binary is deprecated. Please use 'llama-embedding' instead."
1554+ @echo " Remove the 'embedding' binary to remove this warning."
1555+ @echo "# ########"
1556+ endif
1557+
1558+ finetune : examples/deprecation-warning/deprecation-warning.cpp
1559+ ifneq (,$(wildcard finetune) )
1560+ $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1561+ $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1562+ @echo "# ########"
1563+ @echo "WARNING: The 'finetune' binary is deprecated. Please use 'llama-finetune' instead."
1564+ @echo " Remove the 'finetune' binary to remove this warning."
1565+ @echo "# ########"
1566+ endif
0 commit comments