diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 00000000..a80f848d
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "lib/simde"]
+ path = lib/simde
+ url = https://github.com/nemequ/simde.git
diff --git a/Makefile b/Makefile
index 18622f51..7d2afc99 100644
--- a/Makefile
+++ b/Makefile
@@ -1,26 +1,40 @@
CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra
CPPFLAGS= -DHAVE_KALLOC
INCLUDES=
-OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o chain.o align.o hit.o map.o format.o pe.o esterr.o splitidx.o ksw2_ll_sse.o
+OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o options.o index.o chain.o align.o hit.o map.o format.o pe.o esterr.o splitidx.o
PROG= minimap2
PROG_EXTRA= sdust minimap2-lite
LIBS= -lm -lz -lpthread
+
+ifeq ($(no_simd),) # if no_simd is not defined
ifeq ($(arm_neon),) # if arm_neon is not defined
+ OBJS+=ksw2_ll_sse.o
ifeq ($(sse2only),) # if sse2only is not defined
OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o
else # if sse2only is defined
OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o
endif
else # if arm_neon is defined
- OBJS+=ksw2_extz2_neon.o ksw2_extd2_neon.o ksw2_exts2_neon.o
- INCLUDES+=-Isse2neon
+ OBJS+=ksw2_ll_neon.o ksw2_extz2_neon.o ksw2_extd2_neon.o ksw2_exts2_neon.o
+ifeq ($(simde),) # arm_neon without SIMDe -> use sse2neon
+ INCLUDES+=-Isse2neon
+endif
ifeq ($(aarch64),) #if aarch64 is not defined
CFLAGS+=-D_FILE_OFFSET_BITS=64 -mfpu=neon -fsigned-char
else #if aarch64 is defined
CFLAGS+=-D_FILE_OFFSET_BITS=64 -fsigned-char
endif
endif
+else
+ OBJS+=ksw2_ll_nosimd.o ksw2_extz2_nosimd.o ksw2_extd2_nosimd.o ksw2_exts2_nosimd.o
+ simde=1 # no_simd can be used only with SIMDe
+endif
+
+ifneq ($(simde),) # if simde is defined
+ CFLAGS+=-DSIMDE_ENABLE_NATIVE_ALIASES -DUSE_SIMDE
+ INCLUDES+=-Ilib/simde
+endif
ifneq ($(asan),)
CFLAGS+=-fsanitize=address
@@ -56,10 +70,8 @@ sdust:sdust.c kalloc.o kalloc.h kdq.h kvec.h kseq.h ketopt.h sdust.h
# SSE-specific targets on x86/x86_64
-ifeq ($(arm_neon),) # if arm_neon is defined, compile this target with the default setting (i.e. no -msse2)
ksw2_ll_sse.o:ksw2_ll_sse.c ksw2.h kalloc.h
$(CC) -c $(CFLAGS) -msse2 $(CPPFLAGS) $(INCLUDES) $< -o $@
-endif
ksw2_extz2_sse41.o:ksw2_extz2_sse.c ksw2.h kalloc.h
$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
@@ -84,6 +96,9 @@ ksw2_dispatch.o:ksw2_dispatch.c ksw2.h
# NEON-specific targets on ARM
+ksw2_ll_neon.o:ksw2_ll_sse.c ksw2.h kalloc.h
+ $(CC) -c $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< -o $@
+
ksw2_extz2_neon.o:ksw2_extz2_sse.c ksw2.h kalloc.h
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
@@ -93,6 +108,20 @@ ksw2_extd2_neon.o:ksw2_extd2_sse.c ksw2.h kalloc.h
ksw2_exts2_neon.o:ksw2_exts2_sse.c ksw2.h kalloc.h
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ $(INCLUDES) $< -o $@
+# no-SIMD version
+
+ksw2_ll_nosimd.o:ksw2_ll_sse.c ksw2.h kalloc.h
+ $(CC) -c $(CFLAGS) $(CPPFLAGS) -DSIMDE_NO_NATIVE $(INCLUDES) $< -o $@
+
+ksw2_extz2_nosimd.o:ksw2_extz2_sse.c ksw2.h kalloc.h
+ $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ -DSIMDE_NO_NATIVE $(INCLUDES) $< -o $@
+
+ksw2_extd2_nosimd.o:ksw2_extd2_sse.c ksw2.h kalloc.h
+ $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ -DSIMDE_NO_NATIVE $(INCLUDES) $< -o $@
+
+ksw2_exts2_nosimd.o:ksw2_exts2_sse.c ksw2.h kalloc.h
+ $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_SSE2_ONLY -D__SSE2__ -DSIMDE_NO_NATIVE $(INCLUDES) $< -o $@
+
# other non-file targets
clean:
diff --git a/README.md b/README.md
index addeb2c1..a62d189b 100644
--- a/README.md
+++ b/README.md
@@ -82,6 +82,10 @@ to disable SSE4 code, which will make minimap2 slightly slower.
Minimap2 also works with ARM CPUs supporting the NEON instruction sets. To
compile for 32 bit ARM architectures (such as ARMv7), use `make arm_neon=1`. To compile for for 64 bit ARM architectures (such as ARMv8), use `make arm_neon=1 aarch64=1`.
+Minimap2 can use [SIMD Everywhere (SIMDe)](https://github.com/nemequ/simde) library for porting implementation
+to the different SIMD instruction sets. To compile using SIMDe, use `make simde=1`. To compile for ARM CPUs, add `simde=1` to the commands given above.
+SIMDe also enables non-SIMD implementation using `make no_simd=1`.
+
### General usage
Without any options, minimap2 takes a reference database and a query sequence
diff --git a/ksw2_extd2_sse.c b/ksw2_extd2_sse.c
index b578274a..162e9e26 100644
--- a/ksw2_extd2_sse.c
+++ b/ksw2_extd2_sse.c
@@ -4,15 +4,23 @@
#include "ksw2.h"
#ifdef __SSE2__
+#ifdef USE_SIMDE
+#include
+#else
#include
+#endif
#ifdef KSW_SSE2_ONLY
#undef __SSE4_1__
#endif
#ifdef __SSE4_1__
+#ifdef USE_SIMDE
+#include
+#else
#include
#endif
+#endif
#ifdef KSW_CPU_DISPATCH
#ifdef __SSE4_1__
diff --git a/ksw2_exts2_sse.c b/ksw2_exts2_sse.c
index e7984c66..4157e382 100644
--- a/ksw2_exts2_sse.c
+++ b/ksw2_exts2_sse.c
@@ -4,15 +4,22 @@
#include "ksw2.h"
#ifdef __SSE2__
+#ifdef USE_SIMDE
+#include
+#else
#include
-
+#endif
#ifdef KSW_SSE2_ONLY
#undef __SSE4_1__
#endif
#ifdef __SSE4_1__
+#ifdef USE_SIMDE
+#include
+#else
#include
#endif
+#endif
#ifdef KSW_CPU_DISPATCH
#ifdef __SSE4_1__
diff --git a/ksw2_extz2_sse.c b/ksw2_extz2_sse.c
index 02bb4c2a..ad191314 100644
--- a/ksw2_extz2_sse.c
+++ b/ksw2_extz2_sse.c
@@ -3,15 +3,23 @@
#include "ksw2.h"
#ifdef __SSE2__
+#ifdef USE_SIMDE
+#include
+#else
#include
+#endif
#ifdef KSW_SSE2_ONLY
#undef __SSE4_1__
#endif
#ifdef __SSE4_1__
+#ifdef USE_SIMDE
+#include
+#else
#include
#endif
+#endif
#ifdef KSW_CPU_DISPATCH
#ifdef __SSE4_1__
diff --git a/ksw2_ll_sse.c b/ksw2_ll_sse.c
index 469de520..14b9b50a 100644
--- a/ksw2_ll_sse.c
+++ b/ksw2_ll_sse.c
@@ -1,9 +1,14 @@
#include
#include
#include
-#include
#include "ksw2.h"
+#ifdef USE_SIMDE
+#include
+#else
+#include
+#endif
+
#ifdef __GNUC__
#define LIKELY(x) __builtin_expect((x),1)
#define UNLIKELY(x) __builtin_expect((x),0)
diff --git a/lib/simde b/lib/simde
new file mode 160000
index 00000000..b30129b3
--- /dev/null
+++ b/lib/simde
@@ -0,0 +1 @@
+Subproject commit b30129b3b48a6823013da2b309c50a081177b6b8