diff --git a/app/Makefile b/app/Makefile index d55f6591..e73cd652 100644 --- a/app/Makefile +++ b/app/Makefile @@ -46,11 +46,19 @@ MORE_OBJECTS= NO_LTO=0 ifeq ($(DEBUG),1) + NO_LTO=1 DBG_SUBDIR+=dbg else DBG_SUBDIR+=rel endif +ifeq ($(NO_LTO),1) + CFLAGS+= -fno-lto +else + CFLAGS+=${CFLAGS_LTO} + LDFLAGS_OPT+= ${LDFLAGS_OPT_LTO} +endif + NO_STDIN= ifeq ($(NO_STDIN),1) CFLAGS+= -DNO_STDIN @@ -91,7 +99,6 @@ ifeq ($(DEBUG),0) $(echo "No profiling set. To use PGO, compile with PGO=1, then run with data, then compile again with PGO=2") endif endif - else CFLAGS += -g endif @@ -161,12 +168,6 @@ endif CLI_OBJECTS=$(addprefix ${CLI_OBJ_PFX},$(addsuffix .o,${CLI_SOURCES})) CFLAGS+=${CFLAGS_AUTO} -ifeq ($(NO_LTO),1) - CFLAGS+= -fno-lto -else - CFLAGS+=${CFLAGS_LTO} - LDFLAGS_OPT+= ${LDFLAGS_OPT_LTO} -endif ifeq ($(VERBOSE),1) CFLAGS+= ${CFLAGS_VECTORIZE_OPTIMIZED} ${CFLAGS_VECTORIZE_MISSED} ${CFLAGS_VECTORIZE_ALL} diff --git a/app/benchmark/README.md b/app/benchmark/README.md index 6af86e8f..57ace323 100644 --- a/app/benchmark/README.md +++ b/app/benchmark/README.md @@ -87,7 +87,7 @@ The following utilities were compared: * Our test system shown in the above graph was a pre-M1 OSX MBA. We also tested on Linux, BSD and Windows. In each case, zsv was the fastest, but in some cases the margin - was smaller (e.g. 20%+ instead of 50% vs xsv on Win). + was smaller (e.g. 20%+ instead of 50% vs xsv on Win). ### Results in above graph (pre-M1 OSX MBA) diff --git a/app/builtin/help.c b/app/builtin/help.c index ebae9fd4..f6bcce3a 100644 --- a/app/builtin/help.c +++ b/app/builtin/help.c @@ -16,7 +16,17 @@ static int main_help(int argc, const char *argv[]) { "", "Usage:", " zsv version: display version info (and if applicable, extension info)", +#ifndef __EMSCRIPTEN__ " zsv (un)register []: (un)register an extension", + " Registration info is saved in zsv.ini located in a directory determined as:", + " ZSV_CONFIG_DIR environment variable value, if set", +# if defined(_WIN32) + " LOCALAPPDATA environment variable value, if set", + " otherwise, C:\\temp", +#else + " otherwise, " PREFIX "/etc", +# endif +#endif " zsv help []", " zsv : run a command on data (see below for details)", " zsv - : invoke command 'cmd' of extension 'id'", @@ -72,8 +82,8 @@ static int main_help(int argc, const char *argv[]) { for(struct zsv_ext_command *cmd = ext->commands; cmd; cmd = cmd->next) fprintf(f, " %s-%s%s%s\n", ext->id, cmd->id, cmd->help ? ": " : "", cmd->help ? cmd->help : ""); } - config_free(&config); } + config_free(&config); } if(!printed_init) fprintf(f, "\n(No extended commands)\n"); diff --git a/app/builtin/license.c b/app/builtin/license.c index b3b6b0fa..8090c1ef 100644 --- a/app/builtin/license.c +++ b/app/builtin/license.c @@ -11,19 +11,19 @@ static int main_license(int argc, const char *argv[]) { (void)(argv); fprintf(stderr, "Note: for third-party licenses & acknowledgements, run `zsv thirdparty`\n"); - printf("\n==========================\n"); + printf("\n====================================================\n"); printf("ZSV/lib license"); printf(" "); - printf("\n==========================\n"); + printf("\n====================================================\n"); fwrite(zsv_license_text_MIT, 1, strlen(zsv_license_text_MIT), stdout); struct cli_config config; if(!config_init(&config, 0, 1, 0)) { for(struct zsv_ext *ext = config.extensions; ext; ext = ext->next) { - printf("\n==========================\n"); + printf("\n====================================================\n"); printf("License for extension '%s'", ext->id); - printf("\n==========================\n"); + printf("\n====================================================\n"); if(ext->license && *ext->license) { size_t len = strlen(ext->license); fwrite(ext->license, 1, len, stdout); diff --git a/app/ext_example/Makefile b/app/ext_example/Makefile index bb51c8b8..d8c70dcf 100644 --- a/app/ext_example/Makefile +++ b/app/ext_example/Makefile @@ -46,6 +46,9 @@ else endif CFLAGS+= ${CFLAGS_PIC} +ifeq ($(ZSV_EXTRAS),1) + CFLAGS+= -DZSV_EXTRAS +endif DEBUG=0 ifeq ($(DEBUG),0) @@ -94,6 +97,10 @@ ${INSTALLED_EXTENSION}: ${TARGET} cp -p $< $@ endif +CLI=${BUILD_DIR}/bin/cli + +RUN_CLI=ZSV_CONFIG_DIR=/tmp ${CLI} + ${BUILD_DIR}/bin/cli: (cd .. && make CC=${CC} CONFIGFILE=${CONFIGFILEPATH} DEBUG=${DEBUG} ${BUILD_DIR}/bin/cli) @@ -102,43 +109,43 @@ ${BUILD_DIR}/objs/utils/%.o: test: test-1 test-2 test-3 test-4 test-5 test-thirdparty -test-1: test-%: ${BUILD_DIR}/bin/cli ${TARGET} - @${BUILD_DIR}/bin/cli my-echo < ${THIS_LIB_BASE}/data/quoted.csv > /tmp/zsvext-$@.out +test-1: test-%: ${CLI} ${TARGET} + @${RUN_CLI} my-echo < ${THIS_LIB_BASE}/data/quoted.csv > /tmp/zsvext-$@.out @cmp /tmp/zsvext-$@.out test/expected/zsvext-$@.out && ${TEST_PASS} || ${TEST_FAIL} -test-2: test-%: ${BUILD_DIR}/bin/cli ${TARGET} - @${BUILD_DIR}/bin/cli my-echo -t < ${THIS_LIB_BASE}/data/hi.txt > /tmp/zsvext-$@.out +test-2: test-%: ${CLI} ${TARGET} + @${RUN_CLI} my-echo -t < ${THIS_LIB_BASE}/data/hi.txt > /tmp/zsvext-$@.out @cmp /tmp/zsvext-$@.out test/expected/zsvext-$@.out && ${TEST_PASS} || ${TEST_FAIL} -test-3: test-%: ${BUILD_DIR}/bin/cli ${TARGET} +test-3: test-%: ${CLI} ${TARGET} @rm -f /tmp/zsvext-$@.out - @${BUILD_DIR}/bin/cli unregister my 2>/dev/null 1>/dev/null || [ 1==1 ] - @${BUILD_DIR}/bin/cli register my 2>&1 | grep -v [.]so >> /tmp/zsvext-$@.out || [ 1==1 ] - @${BUILD_DIR}/bin/cli unregister my 2>&1 | grep -v [.]so >> /tmp/zsvext-$@.out || [ 1==1 ] - @${BUILD_DIR}/bin/cli unregister my 2>&1 | grep -v [.]so >> /tmp/zsvext-$@.out || [ 1==1 ] - @${BUILD_DIR}/bin/cli help 2>&1 | grep -v [.]so >> /tmp/zsvext-$@.out || [ 1==1 ] - @${BUILD_DIR}/bin/cli register my 2>&1 | grep -v [.]so >> /tmp/zsvext-$@.out || [ 1==1 ] - @${BUILD_DIR}/bin/cli help 2>&1 | grep -v [.]so >> /tmp/zsvext-$@.out || [ 1==1 ] + @${RUN_CLI} unregister my 2>/dev/null 1>/dev/null || [ 1==1 ] + @${RUN_CLI} register my 2>&1 | grep -v [.]so >> /tmp/zsvext-$@.out || [ 1==1 ] + @${RUN_CLI} unregister my 2>&1 | grep -v [.]so >> /tmp/zsvext-$@.out || [ 1==1 ] + @${RUN_CLI} unregister my 2>&1 | grep -v [.]so >> /tmp/zsvext-$@.out || [ 1==1 ] + @${RUN_CLI} help 2>&1 | grep -v [.]so >> /tmp/zsvext-$@.out || [ 1==1 ] + @${RUN_CLI} register my 2>&1 | grep -v [.]so >> /tmp/zsvext-$@.out || [ 1==1 ] + @${RUN_CLI} help 2>&1 | grep -v [.]so >> /tmp/zsvext-$@.out || [ 1==1 ] @cmp /tmp/zsvext-$@.out 2>&1 test/expected/zsvext-$@.out && ${TEST_PASS} || ${TEST_FAIL} -test-4: test-%: ${BUILD_DIR}/bin/cli ${TARGET} - @${BUILD_DIR}/bin/cli unregister my 2>/dev/null 1>/dev/null || [ 1==1 ] - @${BUILD_DIR}/bin/cli license > /tmp/zsvext-$@.out 2>> /tmp/zsvext-$@.err - @${BUILD_DIR}/bin/cli register my 2>/dev/null 1>/dev/null - @${BUILD_DIR}/bin/cli license >> /tmp/zsvext-$@.out 2> /tmp/zsvext-$@.err +test-4: test-%: ${CLI} ${TARGET} + @${RUN_CLI} unregister my 2>/dev/null 1>/dev/null || [ 1==1 ] + @${RUN_CLI} license > /tmp/zsvext-$@.out 2>> /tmp/zsvext-$@.err + @${RUN_CLI} register my 2>/dev/null 1>/dev/null + @${RUN_CLI} license >> /tmp/zsvext-$@.out 2> /tmp/zsvext-$@.err @cmp /tmp/zsvext-$@.out test/expected/zsvext-$@.out && ${TEST_PASS} || ${TEST_FAIL} @cmp /tmp/zsvext-$@.err test/expected/zsvext-$@.err && ${TEST_PASS} || ${TEST_FAIL} -test-5: test-%: ${BUILD_DIR}/bin/cli ${TARGET} - @${BUILD_DIR}/bin/cli my-count newline.csv 2>/dev/null > /tmp/zsvext-$@.out - @${BUILD_DIR}/bin/cli my-count -q newline.csv 2>/dev/null >> /tmp/zsvext-$@.out +test-5: test-%:${TARGET} + @${RUN_CLI} my-count newline.csv 2>/dev/null > /tmp/zsvext-$@.out + @${RUN_CLI} my-count -q newline.csv 2>/dev/null >> /tmp/zsvext-$@.out @cmp /tmp/zsvext-$@.out test/expected/zsvext-$@.out && ${TEST_PASS} || ${TEST_FAIL} -test-thirdparty: test-%: ${BUILD_DIR}/bin/cli ${TARGET} - @${BUILD_DIR}/bin/cli unregister my 2>/dev/null 1>/dev/null || [ 1==1 ] - @${BUILD_DIR}/bin/cli thirdparty > /tmp/zsvext-$@.out - @${BUILD_DIR}/bin/cli register my 2>/dev/null 1>/dev/null - @${BUILD_DIR}/bin/cli thirdparty >> /tmp/zsvext-$@.out +test-thirdparty: test-%: ${CLI} ${TARGET} + @${RUN_CLI} unregister my 2>/dev/null 1>/dev/null || [ 1==1 ] + @${RUN_CLI} thirdparty > /tmp/zsvext-$@.out + @${RUN_CLI} register my 2>/dev/null 1>/dev/null + @${RUN_CLI} thirdparty >> /tmp/zsvext-$@.out @cmp /tmp/zsvext-$@.out test/expected/zsvext-$@.out && ${TEST_PASS} || ${TEST_FAIL} clean: diff --git a/app/ext_example/configure b/app/ext_example/configure index d7ed6796..85c7a000 100755 --- a/app/ext_example/configure +++ b/app/ext_example/configure @@ -2,7 +2,7 @@ # Based on the configure script from musl libc, MIT licensed; vis, ISC licensed usage () { -cat </dev/null 2>&1 && { echo "$1" ; return 0 ; } + tr '\n' ' ' </dev/null 2>&1 && { echo "$1" ; return 0 ; } $1 EOF -printf %s\\n "$1" | sed -e "s/'/'\\\\''/g" -e "1s/^/'/" -e "\$s/\$/'/" -e "s#^'\([-[:alnum:]_,./:]*\)=\(.*\)\$#\1='\2#" + printf %s\\n "$1" | sed -e "s/'/'\\\\''/g" -e "1s/^/'/" -e "\$s/\$/'/" -e "s#^'\([-[:alnum:]_,./:]*\)=\(.*\)\$#\1='\2#" } echo () { printf "%s\n" "$*" ; } fail () { echo "$*" ; exit 1 ; } fnmatch () { eval "case \"\$2\" in $1) return 0 ;; *) return 1 ;; esac" ; } cmdexists () { type "$1" >/dev/null 2>&1 ; } -trycc () { test -z "$CC" && cmdexists "$1" && CC=$1 ; } +trycc1 () { test -z "$CC" && cmdexists "$1" && ( "$1" --version | grep -i gcc ) >/dev/null 2>/dev/null && CC=$1 ; } +trycc2 () { test -z "$CC" && cmdexists "$1" && CC=$1 ; } tryar () { test -z "$AR" && cmdexists "$1" && AR=$1 ; } tryranlib () { test -z "$RANLIB" && cmdexists "$1" && RANLIB=$1 ; } tryawk () { test -z "$AWK" && "$1" 'function works () {return 0} BEGIN{exit works()}' && AWK=$1 ; } -tryllvmprofdata () { test -z "$LLVM_PROFDATA" && cmdexists "$1" && LLVM_PROFDATA=$1 ; } +trymake() { + if test -z "$MAKE" && cmdexists "$1" ; then + v=`$1 --version | grep 'GNU Make' | sed -e 's/.*Make //'` + if [ "$v" = "" ] ; then + printf "Found but not using non-GNU make ($1)... " + else + MAKE="$1" + maj=`echo $v | cut -d. -f 1` + min=`echo $v | cut -d. -f 2` + if ! test $maj -gt 3 -o '(' $maj -eq 3 -a $min -ge 81 ')' ; then + printf "Warning: using make, but version $v < 3.81 has not been tested. " + fi + fi + fi +} stripdir () { -while eval "fnmatch '*/' \"\${$1}\"" ; do eval "$1=\${$1%/}" ; done + while eval "fnmatch '*/' \"\${$1}\"" ; do eval "$1=\${$1%/}" ; done } +NO_HAVE= trycchdr () { -printf "checking whether there is a header called %s... " "$2" -if find $CCSEARCHPATH -name "$2" | grep "/$2$" >/dev/null 2>&1 ; then -upper2=$(echo "$2" | tr a-z A-Z | tr . _) -eval "$1=\"\${$1} -DHAVE_\${upper2}\"" -eval "$1=\${$1# }" -printf "yes\n" -return 0 -else -printf "no\n" -return 1 -fi + printf "checking whether there is a header called %s... " "$2" + dn=`dirname "$2"` + if [ "$dn" != "" ]; then + dn="/$dn" + bn=`basename "$2"` + for x in $CCSEARCHPATH ; do + fnd=$fnd"$x$dn " + done + else + bn="$2" + fnd=$CCSEARCHPATH + fi + upper2=$(echo "$2" | tr a-z A-Z | tr . _ | tr / _) + if find $fnd -name "$bn" 2>/dev/null | grep "/$2$" >/dev/null 2>&1 ; then + eval "$1=\"\${$1} -DHAVE_\${upper2}\"" + eval "$1=\${$1# }" + printf "yes\n" + return 0 + else + printf "no\n" + NO_HAVE="$NO_HAVE +NO_$upper2 = 1" + return 1 + fi } tryccfn () { -printf "checking whether compiler accepts %s from %s... " "$2" "$3" -printf "%s\n" $3 | sed 's/\(.*\)/#include <\1>/' > "$tmpc" -cat >> "$tmpc" <> "$tmpc" + else + printf "checking whether compiler accepts %s from %s..." "$2" "$3" + fi + if [ "$3" != "" ]; then + printf "%s\n" $3 | sed 's/\(.*\)/#include <\1>/' >> "$tmpc" + fi + cat >> "$tmpc" </dev/null 2>&1 ; then -flag=$(echo "$2" | cut -d'(' -f1 | tr a-z A-Z) -eval "vars=\$$1" -if ! printf "%s\n" ${vars} | grep "\-DHAVE_${flag}\$" >/dev/null 2>&1; then -flag="-DHAVE_${flag}" -eval "$1=\"\${vars} \${flag}\"" -eval "$1=\${$1# }" -fi -printf "yes\n" -return 0 -else -printf "no\n" -return 1 -fi + flag=$(echo "$2" | cut -d'(' -f1 | tr a-z A-Z) + if $CC -o "$tmpo" "$tmpc" >/dev/null 2>&1 ; then + if [ "$4" != "" ] ; then + lib=$(echo "$4" | tr a-z A-Z | tr . _) + USE_LIBS="$USE_LIBS +USE_LIB_$lib = 1" + fi + + eval "vars=\$$1" + if ! printf "%s\n" ${vars} | grep "\-DHAVE_${flag}\$" >/dev/null 2>&1; then + flag="-DHAVE_${flag}" + eval "$1=\"\${vars} \${flag}\"" + eval "$1=\${$1# }" + fi + printf "yes\n" + return 0 + else + printf "no\n" + NO_HAVE="$NO_HAVE +NO_$flag = 1" + echo "------" >> $CONFIGFILE.log + echo "Failed: $CC -o $tmpo tmp.c" >> $CONFIGFILE.log + cat "$tmpc" >> $CONFIGFILE.log + echo "------" >> $CONFIGFILE.log + return 1 + fi +} + +tryccfn1 () { + rm -f "$tmpc" + printf "checking whether compiler accepts %s(%s)..." "$2" "$3" + cat >> "$tmpc" </dev/null 2>&1 ; then + flag="-DHAVE_${flag}" + have=1 + else + flag="-DNO_${flag}" + have=0 + fi + if ! printf "%s\n" ${vars} | grep "\-D${flag}\$" >/dev/null 2>&1; then + eval "$1=\"\${vars} \${flag}\"" + eval "$1=\${$1# }" + fi + if [ "$have" = "1" ]; then + printf "yes\n" + return 0 + else + printf "no\n" + return 1 + fi } tryflag () { -printf "checking whether compiler accepts %s... " "$2" -echo "int main() {return 0;}" > "$tmpc" -if $CC $CFLAGS_TRY $2 -c -o "$tmpo" "$tmpc" >/dev/null 2>&1 ; then -printf "yes\n" -eval "$1=\"\${$1} \$2\"" -eval "$1=\${$1# }" -return 0 -else -printf "no\n" -return 1 -fi + printf "checking whether compiler accepts %s... " "$2" + echo "int main() {return 0;}" > "$tmpc" + if $CC $CFLAGS_TRY $2 -c -o "$tmpo" "$tmpc" >/dev/null 2>&1 ; then + printf "yes\n" + eval "$1=\"\${$1} \$2\"" + eval "$1=\${$1# }" + return 0 + else + printf "no\n" + return 1 + fi } -tryldflag () { -printf "checking whether linker accepts %s... " "$2" -echo "int main(){return 0;}" > "$tmpc" -if $CC $LDFLAGS_TRY "$2" -o "$tmpo" "$tmpc" >/dev/null 2>&1 ; then -printf "yes\n" -eval "$1=\"\${$1} \$2\"" -eval "$1=\${$1# }" -return 0 -else -printf "no\n" -return 1 -fi +tryldflag () { # var, flag, other_arguments (optional) + printf "checking whether linker accepts %s... " "$2" + echo "int main(){return 0;}" > "$tmpc" + if $CC $LDFLAGS "$2" $3 -o "$tmpo" "$tmpc" >/dev/null 2>&1 ; then + printf "yes\n" + eval "$1=\"\${$1} \$2\"" + eval "$1=\${$1# }" + return 0 + else + printf "no\n" + return 1 + fi } trysharedldflag () { -printf "checking whether linker accepts %s... " "$2" -echo "typedef int x;" > "$tmpc" -if $CC $LDFLAGS_TRY -shared "$2" -o "$tmpo" "$tmpc" >/dev/null 2>&1 ; then -printf "yes\n" -eval "$1=\"\${$1} \$2\"" -eval "$1=\${$1# }" -return 0 -else -printf "no\n" -return 1 -fi + printf "checking whether linker accepts %s... " "$2" + echo "typedef int x;" > "$tmpc" + if $CC $LDFLAGS -shared "$2" -o "$tmpo" "$tmpc" >/dev/null 2>&1 ; then + printf "yes\n" + eval "$1=\"\${$1} \$2\"" + eval "$1=\${$1# }" + return 0 + else + printf "no\n" + return 1 + fi } # Beginning of actual script @@ -150,60 +230,92 @@ CFLAGS_AUTO= CFLAGS_TRY= LDFLAGS_AUTO= LDFLAGS_TRY= -CONFIGFILE=config.mk +CONFIGFILE=$CONFIGFILE +if [ "$CONFIGFILE" = "" ]; then + CONFIGFILE=config.mk +fi + +# check prefixes first, since others may be derived from it unless overridden PREFIX=$PREFIX -if [ "$PREFIX" == "" ]; then +for arg ; do + case "$arg" in + --prefix=*) PREFIX=${arg#*=} ;; + esac +done +if [ "$PREFIX" = "" ]; then PREFIX=/usr/local +elif [ "$PREFIX" = "${PREFIX#/}" ]; then + PREFIX="`pwd`/$PREFIX" fi -EXEC_PREFIX='$(PREFIX)' -BINDIR='$(EXEC_PREFIX)/bin' -SHAREDIR='$(PREFIX)/share' + +EXEC_PREFIX=$EXEC_PREFIX +for arg ; do + case "$arg" in + --exec-prefix=*) EXEC_PREFIX=${arg#*=} ;; + esac +done +if [ "$EXEC_PREFIX" = "" ]; then + EXEC_PREFIX=$PREFIX +elif [ "$EXEC_PREFIX" = "${EXEC_PREFIX#/}" ]; then + EXEC_PREFIX="`pwd`/$EXEC_PREFIX" +fi + DOCDIR='$(PREFIX)/share/doc' MANDIR='$(PREFIX)/share/man' +MINIMAL=no + help=yes usesmalllut=no usedebugstderr=no -useclock=no usepie=auto usepic=auto +usetermcap=auto +usejq=auto for arg ; do -case "$arg" in ---help|-h) usage ;; ---host=*) host=${arg#*=} ;; ---config-file=*) CONFIGFILE=${arg#*=} ;; ---prefix=*) PREFIX=${arg#*=} ;; ---exec-prefix=*) EXEC_PREFIX=${arg#*=} ;; ---bindir=*) BINDIR=${arg#*=} ;; ---sharedir=*) SHAREDIR=${arg#*=} ;; ---docdir=*) DOCDIR=${arg#*=} ;; ---mandir=*) MANDIR=${arg#*=} ;; ---enable-small-lut|--enable-small-lut=yes) usesmalllut=yes ;; ---disable-small-lut|--enable-small-lut=no) usesmalllut=no ;; ---enable-debug-stderr|--enable-debug-stderr=yes) usedebugstderr=yes ;; ---disable-debug-stderr|--enable-debug-stderr=no) usedebugstderr=no ;; ---enable-clock|--enable-clock=yes) useclock=yes ;; ---disable-clock|--enable-clock=no) useclock=no ;; ---enable-pie|--enable-pie=yes) usepie=yes ;; ---enable-pie=auto) usepie=auto ;; ---disable-pie|--enable-pie=no) usepie=no ;; ---enable-pic|enable-pic=yes) usepic=yes ;; ---enable-pic=auto) usepic=auto ;; ---disable-pic|--enable-pic=no) usepic=no ;; ---enable-*|--disable-*|--with-*|--without-*|--*dir=*|--build=*) ;; --* ) echo "$0: unknown option $arg" ;; -CC=*) CC=${arg#*=} ;; -CFLAGS=*) CFLAGS=${arg#*=} ;; -CPPFLAGS=*) CPPFLAGS=${arg#*=} ;; -LDFLAGS=*) LDFLAGS=${arg#*=} ;; -*=*) ;; -*) ;; -esac + case "$arg" in + --help|-h) usage ;; + --host=*) host=${arg#*=} ;; + --config-file=*) CONFIGFILE=${arg#*=} ;; + CONFIGFILE=*) CONFIGFILE=${arg#*=} ;; + --prefix=*) ;; # already handled + --exec-prefix=*) ;; # already handled EXEC_PREFIX=${arg#*=} ;; + --docdir=*) DOCDIR=${arg#*=} ;; + --mandir=*) MANDIR=${arg#*=} ;; + --minimal|--minimal=yes) MINIMAL=yes;; + --try-avx512|--try-avx512=yes) TRY_AVX512=yes;; + --enable-small-lut|--enable-small-lut=yes) usesmalllut=yes ;; + --disable-small-lut|--enable-small-lut=no) usesmalllut=no ;; + --enable-debug-stderr|--enable-debug-stderr=yes) usedebugstderr=yes ;; + --disable-debug-stderr|--enable-debug-stderr=no) usedebugstderr=no ;; + --enable-pie|--enable-pie=yes) usepie=yes ;; + --enable-pie=auto) usepie=auto ;; + --disable-pie|--enable-pie=no) usepie=no ;; + --enable-pic|enable-pic=yes) usepic=yes ;; + --enable-termcap|--enable-termcap=yes) usetermcap=yes ;; + --enable-termcap=auto) usetermcap=auto ;; + --disable-termcap|--enable-termcap=no) usetermcap=no ;; + --enable-jq|--enable-jq=yes) usejq=yes ;; + --enable-jq=auto) usejq=auto ;; + --disable-jq|--enable-jq=no) usejq=no ;; + --enable-pic=auto) usepic=auto ;; + --disable-pic|--enable-pic=no) usepic=no ;; + --enable-*|--disable-*|--with-*|--without-*|--*dir=*|--build=*) ;; + -* ) echo "$0: unknown option $arg" ;; + CC=*) CC=${arg#*=} ;; + CFLAGS=*) CFLAGS=${arg#*=} ;; + CPPFLAGS=*) CPPFLAGS=${arg#*=} ;; + LDFLAGS=*) LDFLAGS=${arg#*=} ;; + *=*) ;; + *) ;; + esac done -for i in PREFIX EXEC_PREFIX BINDIR SHAREDIR DOCDIR MANDIR ; do -stripdir $i +echo "config will be saved to $CONFIGFILE" + +for i in PREFIX EXEC_PREFIX DOCDIR MANDIR ; do + stripdir $i done # @@ -212,10 +324,10 @@ done i=0 set -C while : ; do i=$(($i+1)) -tmpc="./conf$$-$PPID-$i.c" -tmpo="./conf$$-$PPID-$i.o" -2>|/dev/null > "$tmpc" && break -test "$i" -gt 50 && fail "$0: cannot create temporary file $tmpc" + tmpc="./conf$$-$PPID-$i.c" + tmpo="./conf$$-$PPID-$i.o" + 2>|/dev/null > "$tmpc" && break + test "$i" -gt 50 && fail "$0: cannot create temporary file $tmpc" done set +C trap 'rm -f "$tmpc" "$tmpo"' EXIT QUIT TERM HUP @@ -229,20 +341,29 @@ for a in awk gawk mawk nawk; do tryawk "$a"; done printf "%s\n" "$AWK" test -n "$AWK" || fail "$0: cannot find an AWK tool" +# +# Find a MAKE tool to use# + +printf "checking for MAKE tool... " +for a in make gmake ; do trymake "$a"; done +printf "%s\n" "$MAKE" +test -n "$MAKE" || fail "$0: cannot find a MAKE tool" + # # Find a C compiler to use # printf "checking for C compiler... " -for c in cc gcc clang; do trycc "$c"; done +for c in cc gcc gcc-11 gcc-10 gcc-9 clang; do trycc1 "$c"; done +for c in cc gcc gcc-11 gcc-10 gcc-8 clang; do trycc2 "$c"; done printf "%s\n" "$CC" test -n "$CC" || fail "$0: cannot find a C compiler" printf "checking whether C compiler works... " echo "typedef int x;" > "$tmpc" if output=$($CC $CPPFLAGS $CFLAGS -c -o "$tmpo" "$tmpc" 2>&1) ; then -printf "yes\n" + printf "yes\n" else -fail "no; compiler output follows:\n%s\n" "$output" + fail "no; compiler output follows:\n%s\n" "$output" fi # @@ -252,24 +373,13 @@ CCSEARCHPATH=$(echo | ${CC} -E -Wp,-v - 2>&1 | ${AWK} '/ \//{print substr($0,2); # # Check if it is clang, and the llvm tools instead -# TODO: PROFDATA triggers specific pgo behaviour -# in the Makefile for clang or gcc... -# This isn't the most elegant solution right -# now... -# compiler=$(${CC} -v 2>&1 | ${AWK} '/ +version +/{for(i=1;i<=NF;i++){if($i=="version"){printf("%s\n",(last=="LLVM")?"clang":last);exit 0;}last=$i;}}') if test "$compiler" = "clang"; then -arlist="$CC-llvm-ar $host-llvm-ar $CC-ar $host-ar llvm-ar ar" -ranliblist="$CC-llvm-ranlib $host-llvm-ranlib $CC-ranlib $host-ranlib llvm-ranlib ranlib" -PROFDATA=zsv.profdata -#printf "checking for llvm-profdata... " -#tryllvmprofdata llvm-profdata -#printf "%s\n" "$LLVM_PROFDATA" -#test -n "$LLVM_PROFDATA" || fail "$0: cannot find llvm-profdata" + arlist="$CC-llvm-ar $host-llvm-ar $CC-ar $host-ar llvm-ar ar" + ranliblist="$CC-llvm-ranlib $host-llvm-ranlib $CC-ranlib $host-ranlib llvm-ranlib ranlib" else -arlist="$CC-ar $host-$compiler-ar $host-ar $compiler-ar ar" -ranliblist="$CC-ranlib $host-$compiler-ranlib $host-ranlib $compiler-ranlib $compiler-ranlib ranlib" -PROFDATA=zsv.profile + arlist="$CC-ar $host-$compiler-ar $host-ar $compiler-ar ar" + ranliblist="$CC-ranlib $host-$compiler-ranlib $host-ranlib $compiler-ranlib $compiler-ranlib ranlib" fi # @@ -292,6 +402,13 @@ printf 'checking host system type... ' test -n "$host" || host=$($CC -dumpmachine 2>/dev/null) || fail "could not determine host" printf '%s\n' "$host" +# start the log +cat >$CONFIGFILE.log <<_ACEOF +Config log. Invocation command line was + $ $0 $@ + +_ACEOF + # # Figure out options to force errors on unknown flags. # @@ -303,16 +420,15 @@ tryldflag LDFLAGS_TRY -Werror=unused-command-line-argument tryldflag LDFLAGS_TRY -Werror=ignored-optimization-argument CFLAGS_STD="-std=gnu11 -D_POSIX_C_SOURCE=200809L -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700" -CFLAGS_OPT="-DNDEBUG" +# CFLAGS_OPT="-DNDEBUG" MINGW=0 case "$host" in -*-*freebsd*) CFLAGS_STD="$CFLAGS_STD -D_BSD_SOURCE -D__BSD_VISIBLE=1" ;; -*-*netbsd*) CFLAGS_STD="$CFLAGS_STD -D_NETBSD_SOURCE" ;; -*-*bsd*) CFLAGS_STD="$CFLAGS_STD -D_BSD_SOURCE" ;; -*-*darwin*) CFLAGS_STD="$CFLAGS_STD -D_DARWIN_C_SOURCE" ;; -*-*mingw32|*-*msys*|*-windows-gnu) + *-*freebsd*) CFLAGS_STD="$CFLAGS_STD -D_BSD_SOURCE -D__BSD_VISIBLE=1" ;; + *-*netbsd*) CFLAGS_STD="$CFLAGS_STD -D_NETBSD_SOURCE" ;; + *-*bsd*) CFLAGS_STD="$CFLAGS_STD -D_BSD_SOURCE" ;; + *-*darwin*) CFLAGS_STD="$CFLAGS_STD -D_DARWIN_C_SOURCE" ;; + *-*mingw32|*-*msys*|*-windows-gnu) CFLAGS_STD="$CFLAGS_STD -D__USE_MINGW_ANSI_STDIO" - #LDFLAGS_AUTO="-Wl,--export-all-symbols" MINGW=1 usepie=no usepic=no @@ -320,25 +436,32 @@ case "$host" in esac if test "$usepie" = "auto" ; then -usepie=yes + usepie=yes fi if test "$usepic" = "auto" ; then -usepic=yes + usepic=yes fi +tryflag CFLAGS_VECTORIZE -fvectorize +tryflag CFLAGS_VECTORIZE -ftree-vectorize +tryflag CFLAGS_VECTORIZE_OPTIMIZED -fopt-info-vec-optimized +tryflag CFLAGS_VECTORIZE_MISSED -fopt-info-vec-missed +tryflag CFLAGS_VECTORIZE_ALL -fopt-info-vec-all +tryflag CFLAGS_OPENMP -fopenmp + if test "$usepie" = "yes" ; then -tryflag CFLAGS_PIE -fpie || tryflag CFLAGS_PIE -fPIE + tryflag CFLAGS_PIE -fpie || tryflag CFLAGS_PIE -fPIE fi if test "$usepic" = "yes" ; then -tryflag CFLAGS_PIC -fpic || tryflag CFLAGS_PIC -fPIC + tryflag CFLAGS_PIC -fpic || tryflag CFLAGS_PIC -fPIC fi test "$usepie" = "yes" && tryldflag LDFLAGS_PIE -pie if test "$usepic" = "yes" ; then -trysharedldflag LDFLAGS_PIC -fpic || trysharedldflag LDFLAGS_PIC -fPIC + trysharedldflag LDFLAGS_PIC -fpic || trysharedldflag LDFLAGS_PIC -fPIC fi test "$usepie" = "no" && tryflag CFLAGS_PIE -fno-pie @@ -346,73 +469,105 @@ test "$usepic" = "no" && tryflag CFLAGS_PIC -fno-pic test "$usepie" = "no" && tryldflag LDFLAGS_PIE -no-pie test "$usepic" = "no" && trysharedldflag LDFLAGS_PIC -fno-pic -if test $MINGW -eq 0 ; then -LDFLAGS_STD="-lc" -tryflag CFLAGS_AUTO -fstack-protector-all -case "$CFLAGS_AUTO" in -*-fstack-protector*) CFLAGS_AUTO="-D_FORTIFY_SOURCE=2"; ;; -esac -fi +#if test $MINGW -eq 0 ; then +#LDFLAGS_STD="-lc" +#tryflag CFLAGS_AUTO -fstack-protector-all +#case "$CFLAGS_AUTO" in +#*-fstack-protector*) CFLAGS_AUTO="-D_FORTIFY_SOURCE=2"; ;; +#esac +#fi tryflag CFLAGS -pipe # Try flags to optimize speed tryflag CFLAGS -ffunction-sections tryflag CFLAGS -fdata-sections -tryflag CFLAGS_OPT -O3 -tryflag CFLAGS_OPT -march=native tryflag CFLAGS_AVX2 -mavx2 tryflag CFLAGS_CLMUL -mvpclmulqdq -tryflag CFLAGS_OPT -flto -tryflag CFLAGS_OPT -fwhole-program +tryflag CFLAGS_LTO -flto tryflag CFLAGS_OPT -fvisibility=hidden tryldflag LDFLAGS_AUTO -Wl,--gc-sections -tryldflag LDFLAGS_OPT -flto +tryldflag LDFLAGS_OPT_LTO -flto tryldflag LDFLAGS_OPT -fwhole-program -tryldflag LDFLAGS_OPT -O3 tryldflag LDFLAGS_OPT -march=native +tryldflag LDFLAGS_OPT -ldl # Try hardening flags if test "$usepie" = "yes" ; then -case "$LDFLAGS_PIE" in -*pie*) - tryldflag LDFLAGS_PIE -Wl,-z,now - tryldflag LDFLAGS_PIE -Wl,-z,relro - ;; -esac + case "$LDFLAGS_PIE" in + *pie*) + tryldflag LDFLAGS_PIE -Wl,-z,now + tryldflag LDFLAGS_PIE -Wl,-z,relro + ;; + esac fi if test "$usepic" = "yes" ; then -case "$LDFLAGS_PIC" in -*pic*) - tryldflag LDFLAGS_PIC -Wl,-z,now - tryldflag LDFLAGS_PIC -Wl,-z,relro - ;; -esac + case "$LDFLAGS_PIC" in + *pic*) + tryldflag LDFLAGS_PIC -Wl,-z,now + tryldflag LDFLAGS_PIC -Wl,-z,relro + ;; + esac fi # Check function availability -tryccfn CFLAGS_AUTO "_alloca(4)" "malloc.h" -trycchdr CFLAGS_AUTO "alloca.h" -tryccfn CFLAGS_AUTO "alloca(4)" "stdlib.h" -tryccfn CFLAGS_AUTO "alloca(4)" "alloca.h" +if [ "$TRY_AVX512" = "yes" ]; then + printf "checking whether avx512 instructions are available..." + HAVE_AVX512= + tryccfn CFLAGS_AVX_512 "_mm512_movepi8_mask" "immintrin.h" && trycchdr CFLAGS_AVX_512 "immintrin.h" && ( tryccfn CFLAGS_AVX_512 "_blsr_u64" "immintrin.h" || tryccfn CFLAGS_AVX_512 "__blsr_u64" "immintrin.h" ) && tryflag CFLAGS_AVX_512 "-mbmi" && tryflag CFLAGS_AVX_512 "-mavx512bw" && CFLAGS_AVX_512="-mbmi -mavx512bw -DHAVE_AVX512=1" && HAVE_AVX512=1 + if [ "$HAVE_AVX512" = "1" ]; then + echo "yes" + else + echo "no" + echo "WARNING: --try-avx512 option enabled, but no avx512 instruction set available" + fi +fi + +tryccfn CFLAGS_AVX_256 "_mm256_movemask_epi8" "immintrin.h" && trycchdr CFLAGS_AVX_256 "immintrin.h" && ( tryccfn CFLAGS_AVX_256 "_blsr_u32" "immintrin.h" || tryccfn CFLAGS_AVX_256 "__blsr_u32" "immintrin.h" ) && CFLAGS_AVX_256="-DHAVE_AVX256=1" && HAVE_AVX256=1 + +tryccfn CFLAGS_AUTO "memmem" "string.h" + +if [ "$usetermcap" = "yes" ] || [ "$usetermcap" = "auto" ] ; then + tryccfn TERMCAP_H "tgetent" "termcap.h" && tryldflag LDFLAGS_TERMCAP -ltermcap && tryccfn CFLAGS_AUTO "tgetent" "termcap.h" termcap || \ + if test "$usetermcap" = "yes"; then + echo "Error: --enable-termcap specified, but not found" + exit 1 + fi +fi + +if [ "$usejq" = "yes" ] || [ "$usejq" = "auto" ] ; then + tryldflag LDFLAGS_JQ -ljq -L${PREFIX}/lib || \ + if test "$usejq" = "yes"; then + echo "Error: --enable-jq specified, but not found" + exit 1 + fi + if ! [ "$LDFLAGS_JQ" = "" ] ; then + tryldflag LDFLAGS_JQ -lm + tryldflag LDFLAGS_JQ -lshlwapi + tryldflag LDFLAGS_JQ -pthread + fi +fi + +tryccfn CFLAGS_AUTO "arc4random_uniform" "stdlib.h" || tryccfn CFLAGS_AUTO "rand_s" "stdlib.h" "" "#define _CRT_RAND_S" +tryccfn1 CFLAGS_AUTO "__builtin_expect" "0,0" +tryccfn1 CFLAGS_AUTO "__builtin_expect_with_probability" "0,0,0.5" # Optional features if test "$usesmalllut" = "yes" ; then - USE_SMALL_LUT=1 + USE_SMALL_LUT=1 else - USE_SMALL_LUT=0 + USE_SMALL_LUT=0 fi if test "$usedebugstderr" = "yes" ; then - USE_DEBUG_STDERR=1 + USE_DEBUG_STDERR=1 else - USE_DEBUG_STDERR=0 + USE_DEBUG_STDERR=0 fi -if test "$useclock" = "yes" ; then - USE_CLOCK=1 -else - USE_CLOCK=0 +ZSV_EXTRAS= +if test "$MINIMAL" = "no" ; then + ZSV_EXTRAS=1 fi printf "creating $CONFIGFILE... " @@ -428,25 +583,36 @@ cat << EOF # Any changes made here will be lost if configure is re-run PREFIX = $PREFIX EXEC_PREFIX = $EXEC_PREFIX -BINDIR = $BINDIR +BINDIR = $EXEC_PREFIX/bin +LIBDIR = $EXEC_PREFIX/lib +INCLUDEDIR = $EXEC_PREFIX/include + DOCPREFIX = $DOCDIR MANPREFIX = $MANDIR -SHAREPREFIX = $SHAREDIR CC = $CC AWK = $AWK +MAKE = $MAKE + AR = $AR RANLIB = $RANLIB -LLVM_PROFDATA = $LLVM_PROFDATA -PROFDATA = $PROFDATA CFLAGS = $CFLAGS LDFLAGS = $LDFLAGS CFLAGS_STD = $CFLAGS_STD LDFLAGS_STD = $LDFLAGS_STD CFLAGS_OPT = $CFLAGS_OPT LDFLAGS_OPT = $LDFLAGS_OPT +LDFLAGS_TERMCAP = $LDFLAGS_TERMCAP +LDFLAGS_JQ = $LDFLAGS_JQ CFLAGS_AUTO = $CFLAGS_AUTO +CFLAGS_LTO = $CFLAGS_LTO LDFLAGS_AUTO = $LDFLAGS_AUTO CFLAGS_AVX2 = $CFLAGS_AVX2 + +HAVE_AVX512=$HAVE_AVX512 +CFLAGS_AVX_512=$CFLAGS_AVX_512 +HAVE_AVX256=$HAVE_AVX256 +CFLAGS_AVX_256=$CFLAGS_AVX_256 + CFLAGS_CLMUL = $CFLAGS_CLMUL CFLAGS_DEBUG = -U_FORTIFY_SOURCE -UNDEBUG -O0 -g3 -ggdb -Wall -Wextra -Wno-missing-field-initializers -Wno-unused-parameter LDFLAGS_DEBUG = -U_FORTIFY_SOURCE -UNDEBUG -O0 -g3 -ggdb @@ -456,10 +622,71 @@ CFLAGS_PIE = $CFLAGS_PIE LDFLAGS_PIE = $LDFLAGS_PIE USE_SMALL_LUT = $USE_SMALL_LUT USE_DEBUG_STDERR = $USE_DEBUG_STDERR -USE_CLOCK = $USE_CLOCK +CFLAGS_VECTORIZE = $CFLAGS_VECTORIZE +CFLAGS_VECTORIZE_OPTIMIZED = $CFLAGS_VECTORIZE_OPTIMIZED +CFLAGS_VECTORIZE_MISSED = $CFLAGS_VECTORIZE_MISSED +CFLAGS_VECTORIZE_ALL = $CFLAGS_VECTORIZE_ALL +CFLAGS_OPENMP = $CFLAGS_OPENMP + +ZSV_EXTRAS = $ZSV_EXTRAS + +$NO_HAVE +$USE_LIBS + EOF exec 1>&3 3>&- printf "done\n" +echo "" +echo "****************************************************************" +echo "* zsv configuration *" +echo "****************************************************************" +if [ "$LDFLAGS_JQ" = "" ]; then + echo "* - libjq (-ljq): no. \`jq\` command will be disabled *" +else + echo "* - libjq: yes *" +fi + +if [ "$LDFLAGS_TERMCAP" = "" ]; then + echo "* - termcap: no. \`pretty\` will use default width assumption *" +else + echo "* - termcap: yes *" +fi + +if [ "$HAVE_AVX512" = "1" ]; then + echo "* - using 512-bit AVX instruction set" +elif [ "$HAVE_AVX256" = "1" ]; then + echo "* - using 256-bit AVX instruction set" +else + echo "* - using 128-bit vector size" +fi + +echo "****************************************************************" + +if ! [ "$MAKE" = "" ]; then + echo "" + echo "To build and install, run" + echo "$MAKE install" > `pwd`/install.sh 2>/dev/null && chmod 755 `pwd`/install.sh 2>/dev/null && + printf " ./install.sh\nor\n" + echo " $MAKE install" + echo "" + echo "Other common commands:" + echo " $MAKE # print available make commands" + echo " (cd src && $MAKE install) # install library" + echo " (cd app && $MAKE install) # install library and zsv CLI" + echo " $MAKE uninstall # uninstall library and zsv CLI" + echo " $MAKE clean # remove build artifacts" + echo "" +fi + +if ! "$CC" --version | grep -i gcc >/dev/null && ! "$CC" --version | grep -i clang >/dev/null ; then + echo "*********************** WARNING!! ***********************" + echo "* Non-gcc/clang compiler untested; use at your own risk *" + echo "* consider using gcc or clang instead e.g.: *" + echo "* ./configure CC=gcc-11 *" + echo "*********************************************************" + echo "" +fi + exit 0 diff --git a/app/ext_example/my_extension.c b/app/ext_example/my_extension.c index f3e87c8f..a3673f5d 100644 --- a/app/ext_example/my_extension.c +++ b/app/ext_example/my_extension.c @@ -61,7 +61,7 @@ static struct zsv_ext_callbacks zsv_cb; * but with an additional preceding zsv_execution_context parameter. * Here, we just declare the functions; we fully define them further below */ -static enum zsv_ext_status count_main(zsv_execution_context ctx, int argc, const char *argv[]); +enum zsv_ext_status count_main(zsv_execution_context ctx, int argc, const char *argv[]); static enum zsv_ext_status echo_main(zsv_execution_context ctx, int argc, const char *argv[]); /** @@ -235,7 +235,7 @@ static const char *count_help = "usage: count [-h,--help] [filename]\n" ; -static enum zsv_ext_status count_main(zsv_execution_context ctx, int argc, const char *argv[]) { +enum zsv_ext_status count_main(zsv_execution_context ctx, int argc, const char *argv[]) { /* help */ if(argc > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) { printf("%s", count_help); diff --git a/app/ext_example/newline.csv b/app/ext_example/newline.csv new file mode 100644 index 00000000..2b287f21 --- /dev/null +++ b/app/ext_example/newline.csv @@ -0,0 +1,2 @@ +"hi +there" \ No newline at end of file diff --git a/app/ext_example/test/expected/zsvext-test-3.out b/app/ext_example/test/expected/zsvext-test-3.out index c779e18c..a3e075d3 100644 --- a/app/ext_example/test/expected/zsvext-test-3.out +++ b/app/ext_example/test/expected/zsvext-test-3.out @@ -7,18 +7,23 @@ zsv: streaming csv processor Usage: zsv version: display version info (and if applicable, extension info) - zsv register []: register an extension + zsv (un)register []: (un)register an extension + Registration info is saved in zsv.ini located in a directory determined as: + ZSV_CONFIG_DIR environment variable value, if set + otherwise, /usr/local/etc zsv help [] - zsv license [] zsv : run a command on data (see below for details) zsv - : invoke command 'cmd' of extension 'id' + zsv thirdparty: view third-party licenses & acknowledgements + zsv license [] Options common to all commands: - -c,--max-column-count: set the maximum number of columns parsed per row. defaults to 1024 - -r,--max-row-size: set the minimum supported maximum row size. defaults to 128k - -B,--buff-size: set internal buffer size. defaults to 256k + -L,--limit-rows : limit processing to the given number of rows (including any header row(s)) + -c,--max-column-count : set the maximum number of columns parsed per row. defaults to 1024 + -r,--max-row-size : set the minimum supported maximum row size. defaults to 64k + -B,--buff-size : set internal buffer size. defaults to 256k -t,--tab-delim: set column delimiter to tab - -O,--other-delim: set column delimiter to other column + -O,--other-delim : set column delimiter to specified character -q,--no-quote: turn off quote handling -v,--verbose: verbose output @@ -30,10 +35,12 @@ Commands: pretty: pretty print for console display flatten: flatten a table consisting of N groups of data, each with 1 or more rows in the table, into a table of N rows - 2json: convert to json + 2json: convert CSV or sqlite3 db table to json + 2db: convert json to sqlite3 db 2tsv : convert to tab-delimited text serialize: convert into 3-column format (id, column name, cell value) stack: stack tables vertically, aligning columns with common names + jq: run a jq filter on json input (No extended commands) Extension my registered @@ -43,18 +50,23 @@ zsv: streaming csv processor Usage: zsv version: display version info (and if applicable, extension info) - zsv register []: register an extension + zsv (un)register []: (un)register an extension + Registration info is saved in zsv.ini located in a directory determined as: + ZSV_CONFIG_DIR environment variable value, if set + otherwise, /usr/local/etc zsv help [] - zsv license [] zsv : run a command on data (see below for details) zsv - : invoke command 'cmd' of extension 'id' + zsv thirdparty: view third-party licenses & acknowledgements + zsv license [] Options common to all commands: - -c,--max-column-count: set the maximum number of columns parsed per row. defaults to 1024 - -r,--max-row-size: set the minimum supported maximum row size. defaults to 128k - -B,--buff-size: set internal buffer size. defaults to 256k + -L,--limit-rows : limit processing to the given number of rows (including any header row(s)) + -c,--max-column-count : set the maximum number of columns parsed per row. defaults to 1024 + -r,--max-row-size : set the minimum supported maximum row size. defaults to 64k + -B,--buff-size : set internal buffer size. defaults to 256k -t,--tab-delim: set column delimiter to tab - -O,--other-delim: set column delimiter to other column + -O,--other-delim : set column delimiter to specified character -q,--no-quote: turn off quote handling -v,--verbose: verbose output @@ -66,10 +78,12 @@ Commands: pretty: pretty print for console display flatten: flatten a table consisting of N groups of data, each with 1 or more rows in the table, into a table of N rows - 2json: convert to json + 2json: convert CSV or sqlite3 db table to json + 2db: convert json to sqlite3 db 2tsv : convert to tab-delimited text serialize: convert into 3-column format (id, column name, cell value) stack: stack tables vertically, aligning columns with common names + jq: run a jq filter on json input Extended commands: Extension 'my': Sample zsv extension diff --git a/app/ext_example/test/expected/zsvext-test-4.out b/app/ext_example/test/expected/zsvext-test-4.out index a6a5f2db..3e91379d 100644 --- a/app/ext_example/test/expected/zsvext-test-4.out +++ b/app/ext_example/test/expected/zsvext-test-4.out @@ -1,7 +1,7 @@ -========================== +==================================================== ZSV/lib license -========================== +==================================================== Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to @@ -20,9 +20,9 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -========================== +==================================================== ZSV/lib license -========================== +==================================================== Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to @@ -41,7 +41,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -========================== +==================================================== License for extension 'my' -========================== +==================================================== Unlicense. See https://github.com/spdx/license-list-data/blob/master/text/Unlicense.txt diff --git a/app/ext_example/test/expected/zsvext-test-5.out b/app/ext_example/test/expected/zsvext-test-5.out new file mode 100644 index 00000000..8f9ca4d6 --- /dev/null +++ b/app/ext_example/test/expected/zsvext-test-5.out @@ -0,0 +1,2 @@ +Rows: 0 +Rows: 1 diff --git a/app/ext_example/test/expected/zsvext-test-thirdparty.out b/app/ext_example/test/expected/zsvext-test-thirdparty.out index 6815a818..89b35a18 100644 --- a/app/ext_example/test/expected/zsvext-test-thirdparty.out +++ b/app/ext_example/test/expected/zsvext-test-thirdparty.out @@ -1,13 +1,13 @@ Third-party licenses and acknowldgements ========================== -ZSV/lib +ZSV/lib third-party dependencies ========================== See https://github.com/liquidaty/zsv/blob/main/misc/THIRDPARTY.md Third-party licenses and acknowldgements ========================== -ZSV/lib +ZSV/lib third-party dependencies ========================== See https://github.com/liquidaty/zsv/blob/main/misc/THIRDPARTY.md diff --git a/app/utils/dirs.c b/app/utils/dirs.c index b0a13be2..2957e9de 100644 --- a/app/utils/dirs.c +++ b/app/utils/dirs.c @@ -37,15 +37,23 @@ static size_t chop_slash(char* buff, size_t len) { */ size_t get_config_dir(char* buff, size_t buffsize, const char *prefix) { #if defined(_WIN32) - const char *env = prefix; - const char *env_val = getenv(env); + const char *env_val = getenv("ZSV_CONFIG_DIR"); + // if(!(env_val && *env_val)) + // env_val = getenv(prefix); if(!(env_val && *env_val)) env_val = getenv("LOCALAPPDATA"); + if(!(env_val && *env_val)) + env_val = "C:\\temp"; int written = snprintf(buff, buffsize, "%s", env_val); #elif defined(__EMSCRIPTEN__) int written = snprintf(buff, buffsize, "/tmp"); #else - int written = snprintf(buff, buffsize, "%s/etc", prefix ? prefix : ""); + int written; + const char *env_val = getenv("ZSV_CONFIG_DIR"); + if(env_val && *env_val) + written = snprintf(buff, buffsize, "%s", env_val); + else + written = snprintf(buff, buffsize, "%s/etc", prefix ? prefix : ""); #endif if(written > 0 && ((size_t)written) < buffsize) return chop_slash(buff, written); diff --git a/scripts/ci-build.sh b/scripts/ci-build.sh index ea14d8b6..d2977f96 100755 --- a/scripts/ci-build.sh +++ b/scripts/ci-build.sh @@ -53,10 +53,14 @@ if [ "$RUN_TESTS" = true ]; then rm -rf build "$PREFIX" "$MAKE" test echo "[INF] Tests completed successfully!" + + echo "[INF] Configuring example extension and running example extension tests" + (cd app/ext_example && "$MAKE" CONFIGFILE=../../config.mk test) + echo "[INF] Tests completed successfully!" fi echo "[INF] Building" -rm -rf build "$PREFIX" +rm -rf build "$PREFIX" /usr/local/etc/zsv.ini "$MAKE" install tree -h "$PREFIX" echo "[INF] Built successfully!" diff --git a/src/vector_delim.c b/src/vector_delim.c index f1fc6402..9b8d9a48 100644 --- a/src/vector_delim.c +++ b/src/vector_delim.c @@ -45,7 +45,6 @@ static inline int vec_delims(const unsigned char *s, size_t n, unsigned total_bytes = 0; for(unsigned i = 0; i < j; i++) { - // memcpy(&str_simd, pSrc1 + i, VECTOR_BYTES); -- will bus error on clang+BSD memcpy(&str_simd, s + i*sizeof(str_simd), sizeof(str_simd)); zsv_uc_vector vtmp = str_simd == *char_match1; vtmp += (str_simd == *char_match2); diff --git a/src/zsv_internal.c b/src/zsv_internal.c index 53de2f25..ea074d77 100644 --- a/src/zsv_internal.c +++ b/src/zsv_internal.c @@ -158,7 +158,7 @@ __attribute__((always_inline)) static inline enum zsv_status row_dl(struct zsv_s scanner->row.allocated + scanner->row.overflow, scanner->row.allocated); scanner->row.overflow = 0; } - if(scanner->opts.row) + if(LIKELY(scanner->opts.row)) scanner->opts.row(scanner->opts.ctx); # ifdef ZSV_EXTRAS scanner->progress.cum_row_count++; @@ -198,8 +198,8 @@ __attribute__((always_inline)) static inline enum zsv_status row_dl(struct zsv_s if(VERY_UNLIKELY(scanner->abort)) return zsv_status_cancelled; scanner->have_cell = 0; - if(scanner->row.used) - scanner->row.used = 0; +// if(scanner->row.used) + scanner->row.used = 0; return zsv_status_ok; } @@ -208,20 +208,29 @@ static inline enum zsv_status cell_and_row_dl(struct zsv_scanner *scanner, unsig return row_dl(scanner); } -#if defined(HAVE_AVX512) -# define VECTOR_BYTES 64 -# define VECTOR_SIZE 512 -# define zsv_mask_t uint64_t -# define movemask_pseudo(x) _mm512_movepi8_mask((__m512i)x) -# define NEXT_BIT __builtin_ffsl -#elif defined(HAVE_AVX256) -# define VECTOR_BYTES 32 -# define VECTOR_SIZE 256 -# define zsv_mask_t uint32_t -# define movemask_pseudo(x) _mm256_movemask_epi8((__m256i)x) -# define NEXT_BIT __builtin_ffs -#else -# define ZSV_NO_AVX +#ifndef ZSV_NO_AVX +# if !defined(__AVX2__) +# define ZSV_NO_AVX +# elif defined(HAVE_AVX512) +# ifndef __AVX512BW__ +# error AVX512 requested, but __AVX512BW__ macro not defined +# else +# define VECTOR_BYTES 64 +# define zsv_mask_t uint64_t +# define movemask_pseudo(x) _mm512_movepi8_mask((__m512i)x) +# define NEXT_BIT __builtin_ffsl +# endif +# elif defined(HAVE_AVX256) +# define VECTOR_BYTES 32 +# define zsv_mask_t uint32_t +# define movemask_pseudo(x) _mm256_movemask_epi8((__m256i)x) +# define NEXT_BIT __builtin_ffs +# else +# define ZSV_NO_AVX +# endif +#endif // ndef ZSV_NO_AVX + +#if defined(ZSV_NO_AVX) # define zsv_mask_t uint16_t # define VECTOR_BYTES 16 # define NEXT_BIT __builtin_ffs @@ -258,7 +267,7 @@ static inline zsv_mask_t movemask_pseudo(zsv_uc_vector v) { vst1q_lane_u8((uint8_t*)&mask + 1, (uint8x16_t)imask, 8); return mask; #else - + // to do: see https://github.com/WebAssembly/simd/issues/131 for wasm zsv_mask_t mask = 0, tmp = 1; for(size_t i = 0; i < sizeof(zsv_uc_vector); i++) { mask += (v[i] ? tmp : 0); @@ -285,16 +294,18 @@ static enum zsv_status zsv_scan_delim(struct zsv_scanner *scanner, scanner->partial_row_length = 0; - int quote = '"'; // ascii code 34 + int quote = scanner->opts.no_quotes > 0 ? -1 : '"'; // ascii code 34 zsv_uc_vector dl_v; memset(&dl_v, delimiter, sizeof(zsv_uc_vector)); // ascii 44 zsv_uc_vector nl_v; memset(&nl_v, '\n', sizeof(zsv_uc_vector)); // ascii code 10 zsv_uc_vector cr_v; memset(&cr_v, '\r', sizeof(zsv_uc_vector)); // ascii code 13 - zsv_uc_vector qt_v; + zsv_uc_vector qt_v; memset(&qt_v, scanner->opts.no_quotes > 0 ? 0 : '"', sizeof(qt_v)); + /* if(scanner->opts.no_quotes > 0) { quote = -1; memset(&qt_v, 0, sizeof(qt_v)); } else memset(&qt_v, '"', sizeof(zsv_uc_vector)); + */ // case "hel"|"o": check if we have an embedded dbl-quote past the initial opening quote, which was // split between the last buffer and this one e.g. "hel""o" where the last buffer ended @@ -320,7 +331,7 @@ static enum zsv_status zsv_scan_delim(struct zsv_scanner *scanner, scanner->buffer_end = bytes_read; for(; i < bytes_read; i++) { - if(mask == 0) { + if(UNLIKELY(mask == 0)) { mask_last_start = i; if(VERY_LIKELY(i < bytes_chunk_end)) { // keep going until we get a delim or we are at the eof @@ -340,7 +351,7 @@ static enum zsv_status zsv_scan_delim(struct zsv_scanner *scanner, size_t next_offset = NEXT_BIT(mask); i = mask_last_start + next_offset - 1; mask = clear_lowest_bit(mask); - if(skip_next_delim) { + if(VERY_UNLIKELY(skip_next_delim)) { skip_next_delim = 0; continue; }