diff --git a/src/configure b/src/configure index d4509a04c05..4a03f7ade87 100755 --- a/src/configure +++ b/src/configure @@ -22,6 +22,8 @@ # ./configure --atlas-root=../tools/ATLAS/build # ./configure --use-cuda=no # disable CUDA detection (will build cpu-only # # version of kaldi even on CUDA-enabled machine +# ./configure --use-cuda --cudatk-dir=/usr/local/cuda/ --cuda-arch=-arch=sm_70 +# # Use cuda in /usr/local/cuda and set the arch to sm_70 # ./configure --static --fst-root=/opt/cross/armv8hf \ # --atlas-root=/opt/cross/armv8hf --host=armv8-rpi3-linux-gnueabihf # # Cross compile for armv8hf, this assumes that you have openfst built @@ -65,6 +67,7 @@ Configuration options: --shared Build and link against shared libraries [default=no] --use-cuda Build with CUDA [default=yes] --cudatk-dir=DIR CUDA toolkit directory + --cuda-arch=FLAGS Override the default CUDA_ARCH flags. See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#nvcc-examples. --double-precision Build with BaseFloat set to double if yes [default=no], mostly useful for testing purposes. --static-fst Build with static OpenFst libraries [default=no] @@ -114,8 +117,13 @@ function rel2abs { fi } +function read_value { + local val=`expr "X$1" : '[^=]*=\(.*\)'`; + echo $val +} + function read_dirname { - local dir_name=`expr "X$1" : '[^=]*=\(.*\)'`; + local dir_name=`read_value $1` local retval=`rel2abs $dir_name` [ -z $retval ] && echo "Bad option '$1': no such directory" && exit 1; echo $retval @@ -421,15 +429,17 @@ function configure_cuda { fi fi - case $CUDA_VERSION in - 5_5) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35" ;; - 6_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50" ;; - 7_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53" ;; - 8_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62" ;; - 9_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62 -gencode arch=compute_70,code=sm_70" ;; - 10_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_72,code=sm_72 -gencode arch=compute_75,code=sm_75" ;; - *) echo "Unsupported CUDA_VERSION (CUDA_VERSION=$CUDA_VERSION), please report it to Kaldi mailing list, together with 'nvcc -h' or 'ptxas -h' which lists allowed -gencode values..."; exit 1 ;; - esac + if [ -z "$CUDA_ARCH" ]; then + case $CUDA_VERSION in + 5_5) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35" ;; + 6_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50" ;; + 7_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53" ;; + 8_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62" ;; + 9_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62 -gencode arch=compute_70,code=sm_70" ;; + 10_*) CUDA_ARCH="-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_53,code=sm_53 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_62,code=sm_62 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_72,code=sm_72 -gencode arch=compute_75,code=sm_75" ;; + *) echo "Unsupported CUDA_VERSION (CUDA_VERSION=$CUDA_VERSION), please report it to Kaldi mailing list, together with 'nvcc -h' or 'ptxas -h' which lists allowed -gencode values..."; exit 1 ;; + esac + fi echo "Using CUDA toolkit $CUDATKDIR (nvcc compiler and runtime libraries)" echo >> kaldi.mk @@ -939,7 +949,7 @@ do mkl_threading=sequential; shift ;; --mkl-threading=*) - mkl_threading=`expr "X$1" : '[^=]*=\(.*\)'`; + mkl_threading=`read_value $1`; threaded_atlas=true; shift ;; --fst-root=*) @@ -970,19 +980,22 @@ do OMPLIBDIR=`read_dirname $1`; shift ;; --mathlib=*) - MATHLIB=`expr "X$1" : '[^=]*=\(.*\)'`; + MATHLIB=`read_value $1`; shift ;; --cudatk-dir=*) CUDATKDIR=`read_dirname $1`; shift ;; #CUDA is used in src/cudamatrix and src/nnet{,bin} only + --cuda-arch=*) + CUDA_ARCH=`read_value $1`; + shift;; --fst-version=*) - OPENFST_VER=`expr "X$1" : '[^=]*=\(.*\)'`; + OPENFST_VER=`read_value $1`; shift;; --host=*) # The type of system where built programs and libraries will run. # It should be in the format cpu-vendor-os. If specified, this script # will infer the target architecture from the specified host triple. - HOST=`expr "X$1" : '[^=]*=\(.*\)'`; + HOST=`read_value $1`; shift ;; --android-incdir=*) android=true;