diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..a80f848d --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "lib/simde"] + path = lib/simde + url = https://github.com/nemequ/simde.git diff --git a/Makefile b/Makefile index 18622f51..7d2afc99 100644 --- a/Makefile +++ b/Makefile @@ -1,26 +1,40 @@ CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra CPPFLAGS= -DHAVE_KALLOC INCLUDES= -OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o chain.o align.o hit.o map.o format.o pe.o esterr.o splitidx.o ksw2_ll_sse.o +OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o chain.o align.o hit.o map.o format.o pe.o esterr.o splitidx.o PROG= minimap2 PROG_EXTRA= sdust minimap2-lite LIBS= -lm -lz -lpthread + +ifeq ($(no_simd),) # if no_simd is not defined ifeq ($(arm_neon),) # if arm_neon is not defined + OBJS+=ksw2_ll_sse.o ifeq ($(sse2only),) # if sse2only is not defined OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o else # if sse2only is defined OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o endif else # if arm_neon is defined - OBJS+=ksw2_extz2_neon.o ksw2_extd2_neon.o ksw2_exts2_neon.o - INCLUDES+=-Isse2neon + OBJS+=ksw2_ll_neon.o ksw2_extz2_neon.o ksw2_extd2_neon.o ksw2_exts2_neon.o +ifeq ($(simde),) # arm_neon without SIMDe -> use sse2neon + INCLUDES+=-Isse2neon +endif ifeq ($(aarch64),) #if aarch64 is not defined CFLAGS+=-D_FILE_OFFSET_BITS=64 -mfpu=neon -fsigned-char else #if aarch64 is defined CFLAGS+=-D_FILE_OFFSET_BITS=64 -fsigned-char endif endif +else + OBJS+=ksw2_ll_nosimd.o ksw2_extz2_nosimd.o ksw2_extd2_nosimd.o ksw2_exts2_nosimd.o + simde=1 # no_simd can be used only with SIMDe +endif + +ifneq ($(simde),) # if simde is defined + CFLAGS+=-DSIMDE_ENABLE_NATIVE_ALIASES -DUSE_SIMDE + INCLUDES+=-Ilib/simde +endif ifneq ($(asan),) CFLAGS+=-fsanitize=address @@ -56,10 +70,8 @@ sdust:sdust.c kalloc.o kalloc.h kdq.h kvec.h kseq.h ketopt.h sdust.h # SSE-specific targets on x86/x86_64 -ifeq ($(arm_neon),) # if arm_neon is defined, compile this target with the default setting (i.e. no -msse2) ksw2_ll_sse.o:ksw2_ll_sse.c ksw2.h kalloc.h $(CC) -c $(CFLAGS) -msse2 $(CPPFLAGS) $(INCLUDES) $< -o $@ -endif ksw2_extz2_sse41.o:ksw2_extz2_sse.c ksw2.h kalloc.h $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ @@ -84,6 +96,9 @@ ksw2_dispatch.o:ksw2_dispatch.c ksw2.h # NEON-specific targets on ARM +ksw2_ll_neon.o:ksw2_ll_sse.c ksw2.h kalloc.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< -o $@ + ksw2_extz2_neon.o:ksw2_extz2_sse.c ksw2.h kalloc.h $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@ @@ -93,6 +108,20 @@ ksw2_extd2_neon.o:ksw2_extd2_sse.c ksw2.h kalloc.h ksw2_exts2_neon.o:ksw2_exts2_sse.c ksw2.h kalloc.h $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@ +# no-SIMD version + +ksw2_ll_nosimd.o:ksw2_ll_sse.c ksw2.h kalloc.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DSIMDE_NO_NATIVE $(INCLUDES) $< -o $@ + +ksw2_extz2_nosimd.o:ksw2_extz2_sse.c ksw2.h kalloc.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ -DSIMDE_NO_NATIVE $(INCLUDES) $< -o $@ + +ksw2_extd2_nosimd.o:ksw2_extd2_sse.c ksw2.h kalloc.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ -DSIMDE_NO_NATIVE $(INCLUDES) $< -o $@ + +ksw2_exts2_nosimd.o:ksw2_exts2_sse.c ksw2.h kalloc.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ -DSIMDE_NO_NATIVE $(INCLUDES) $< -o $@ + # other non-file targets clean: diff --git a/README.md b/README.md index addeb2c1..a62d189b 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,10 @@ to disable SSE4 code, which will make minimap2 slightly slower. Minimap2 also works with ARM CPUs supporting the NEON instruction sets. To compile for 32 bit ARM architectures (such as ARMv7), use `make arm_neon=1`. To compile for for 64 bit ARM architectures (such as ARMv8), use `make arm_neon=1 aarch64=1`. +Minimap2 can use [SIMD Everywhere (SIMDe)](https://github.com/nemequ/simde) library for porting implementation +to the different SIMD instruction sets. To compile using SIMDe, use `make simde=1`. To compile for ARM CPUs, add `simde=1` to the commands given above. +SIMDe also enables non-SIMD implementation using `make no_simd=1`. + ### General usage Without any options, minimap2 takes a reference database and a query sequence diff --git a/ksw2_extd2_sse.c b/ksw2_extd2_sse.c index b578274a..162e9e26 100644 --- a/ksw2_extd2_sse.c +++ b/ksw2_extd2_sse.c @@ -4,15 +4,23 @@ #include "ksw2.h" #ifdef __SSE2__ +#ifdef USE_SIMDE +#include +#else #include +#endif #ifdef KSW_SSE2_ONLY #undef __SSE4_1__ #endif #ifdef __SSE4_1__ +#ifdef USE_SIMDE +#include +#else #include #endif +#endif #ifdef KSW_CPU_DISPATCH #ifdef __SSE4_1__ diff --git a/ksw2_exts2_sse.c b/ksw2_exts2_sse.c index e7984c66..4157e382 100644 --- a/ksw2_exts2_sse.c +++ b/ksw2_exts2_sse.c @@ -4,15 +4,22 @@ #include "ksw2.h" #ifdef __SSE2__ +#ifdef USE_SIMDE +#include +#else #include - +#endif #ifdef KSW_SSE2_ONLY #undef __SSE4_1__ #endif #ifdef __SSE4_1__ +#ifdef USE_SIMDE +#include +#else #include #endif +#endif #ifdef KSW_CPU_DISPATCH #ifdef __SSE4_1__ diff --git a/ksw2_extz2_sse.c b/ksw2_extz2_sse.c index 02bb4c2a..ad191314 100644 --- a/ksw2_extz2_sse.c +++ b/ksw2_extz2_sse.c @@ -3,15 +3,23 @@ #include "ksw2.h" #ifdef __SSE2__ +#ifdef USE_SIMDE +#include +#else #include +#endif #ifdef KSW_SSE2_ONLY #undef __SSE4_1__ #endif #ifdef __SSE4_1__ +#ifdef USE_SIMDE +#include +#else #include #endif +#endif #ifdef KSW_CPU_DISPATCH #ifdef __SSE4_1__ diff --git a/ksw2_ll_sse.c b/ksw2_ll_sse.c index 469de520..14b9b50a 100644 --- a/ksw2_ll_sse.c +++ b/ksw2_ll_sse.c @@ -1,9 +1,14 @@ #include #include #include -#include #include "ksw2.h" +#ifdef USE_SIMDE +#include +#else +#include +#endif + #ifdef __GNUC__ #define LIKELY(x) __builtin_expect((x),1) #define UNLIKELY(x) __builtin_expect((x),0) diff --git a/lib/simde b/lib/simde new file mode 160000 index 00000000..b30129b3 --- /dev/null +++ b/lib/simde @@ -0,0 +1 @@ +Subproject commit b30129b3b48a6823013da2b309c50a081177b6b8