diff --git a/easybuild/easyconfigs/f/FFTW/FFTW-3.3.6-gompi-2017b.eb b/easybuild/easyconfigs/f/FFTW/FFTW-3.3.6-gompi-2017b.eb new file mode 100644 index 000000000000..a21d63724dbd --- /dev/null +++ b/easybuild/easyconfigs/f/FFTW/FFTW-3.3.6-gompi-2017b.eb @@ -0,0 +1,17 @@ +name = 'FFTW' +version = '3.3.6' + +homepage = 'http://www.fftw.org' +description = """FFTW is a C subroutine library for computing the discrete Fourier transform (DFT) + in one or more dimensions, of arbitrary input size, and of both real and complex data.""" + +toolchain = {'name': 'gompi', 'version': '2017b'} +toolchainopts = {'pic': True} + +source_urls = [homepage] +sources = ['fftw-%(version)s-pl2.tar.gz'] +checksums = ['a5de35c5c824a78a058ca54278c706cdf3d4abba1c56b63531c2cb05f5d57da2'] + +runtest = 'check' + +moduleclass = 'numlib' diff --git a/easybuild/easyconfigs/f/foss/foss-2017b.eb b/easybuild/easyconfigs/f/foss/foss-2017b.eb new file mode 100644 index 000000000000..d217118e8689 --- /dev/null +++ b/easybuild/easyconfigs/f/foss/foss-2017b.eb @@ -0,0 +1,33 @@ +easyblock = 'Toolchain' + +name = 'foss' +version = '2017b' + +homepage = '(none)' +description = """GNU Compiler Collection (GCC) based compiler toolchain, including + OpenMPI for MPI support, OpenBLAS (BLAS and LAPACK support), FFTW and ScaLAPACK.""" + +toolchain = {'name': 'dummy', 'version': 'dummy'} + +gccver = '6.4.0-2.28' + +blaslib = 'OpenBLAS' +blasver = '0.2.20' +blas = '%s-%s' % (blaslib, blasver) + +# toolchain used to build foss dependencies +comp_mpi_tc_name = 'gompi' +comp_mpi_tc = (comp_mpi_tc_name, version) + +# we need GCC and OpenMPI as explicit dependencies instead of gompi toolchain +# because of toolchain preparation functions +# For binutils, stick to http://wiki.osdev.org/Cross-Compiler_Successful_Builds +dependencies = [ + ('GCC', gccver), + ('OpenMPI', '2.1.1', '', ('GCC', gccver)), + (blaslib, blasver, '', ('GCC', gccver)), + ('FFTW', '3.3.6', '', comp_mpi_tc), + ('ScaLAPACK', '2.0.2', '-%s' % blas, comp_mpi_tc), +] + +moduleclass = 'toolchain' diff --git a/easybuild/easyconfigs/g/gompi/gompi-2017b.eb b/easybuild/easyconfigs/g/gompi/gompi-2017b.eb new file mode 100644 index 000000000000..acba6f51d98b --- /dev/null +++ b/easybuild/easyconfigs/g/gompi/gompi-2017b.eb @@ -0,0 +1,20 @@ +easyblock = "Toolchain" + +name = 'gompi' +version = '2017b' + +homepage = '(none)' +description = """GNU Compiler Collection (GCC) based compiler toolchain, + including OpenMPI for MPI support.""" + +toolchain = {'name': 'dummy', 'version': 'dummy'} + +gccver = '6.4.0-2.28' + +# compiler toolchain dependencies +dependencies = [ + ('GCC', gccver), # includes both GCC and binutils + ('OpenMPI', '2.1.1', '', ('GCC', gccver)), +] + +moduleclass = 'toolchain' diff --git a/easybuild/easyconfigs/h/HPL/HPL-2.2-foss-2017b.eb b/easybuild/easyconfigs/h/HPL/HPL-2.2-foss-2017b.eb new file mode 100644 index 000000000000..45cfc6864783 --- /dev/null +++ b/easybuild/easyconfigs/h/HPL/HPL-2.2-foss-2017b.eb @@ -0,0 +1,19 @@ +name = 'HPL' +version = '2.2' + +homepage = 'http://www.netlib.org/benchmark/hpl/' +description = """HPL is a software package that solves a (random) dense linear system in double precision (64 bits) + arithmetic on distributed-memory computers. It can thus be regarded as a portable as well as freely available + implementation of the High Performance Computing Linpack Benchmark.""" + +toolchain = {'name': 'foss', 'version': '2017b'} +toolchainopts = {'usempi': True} + +sources = [SOURCELOWER_TAR_GZ] +source_urls = ['http://www.netlib.org/benchmark/%(namelower)s'] +checksums = ['ac7534163a09e21a5fa763e4e16dfc119bc84043f6e6a807aba666518f8df440'] + +# fix Make dependencies, so parallel build also works +patches = ['HPL_parallel-make.patch'] + +moduleclass = 'tools' diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.2.20-GCC-6.4.0-2.28.eb b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.2.20-GCC-6.4.0-2.28.eb new file mode 100644 index 000000000000..5b7ec851fcd9 --- /dev/null +++ b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.2.20-GCC-6.4.0-2.28.eb @@ -0,0 +1,51 @@ +easyblock = 'ConfigureMake' + +name = 'OpenBLAS' +version = '0.2.20' + +homepage = 'http://xianyi.github.com/OpenBLAS/' +description = "OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version." + +toolchain = {'name': 'GCC', 'version': '6.4.0-2.28'} + +large_src = 'large.tgz' +timing_src = 'timing.tgz' + +source_urls = [ + # order matters, trying to download the large.tgz/timing.tgz LAPACK tarballs from GitHub causes trouble + 'http://www.netlib.org/lapack/timing/', + 'https://github.com/xianyi/OpenBLAS/archive/', +] +sources = ['v%(version)s.tar.gz'] +checksums = [ + '5ef38b15d9c652985774869efd548b8e3e972e1e99475c673b25537ed7bcf394', # v0.2.20.tar.gz (OpenBLAS) + 'f328d88b7fa97722f271d7d0cfea1c220e0f8e5ed5ff01d8ef1eb51d6f4243a1', # large.tgz + '999c65f8ea8bd4eac7f1c7f3463d4946917afd20a997807300fe35d70122f3af', # timing.tgz + # OpenBLAS-0.2.20_fix-Intel-L1-cache-size-detection.patch + '1d043e4838ec1f90b2b49318b780e3ab13b46133cb72a8d83eb0e3b1b056c4d6', + '1e6a046ab658c6e0b351de901d2812db28c2042f9f141416144c2faaf71fbb37', # OpenBLAS-0.2.20_revert-honor-cpuset.patch +] + +patches = [ + (large_src, '.'), + (timing_src, '.'), + 'OpenBLAS-%(version)s_fix-Intel-L1-cache-size-detection.patch', + 'OpenBLAS-%(version)s_revert-honor-cpuset.patch', +] + +skipsteps = ['configure'] + +buildopts = 'BINARY=64 USE_THREAD=1 USE_OPENMP=1 CC="$CC" FC="$F77"' +installopts = "USE_THREAD=1 USE_OPENMP=1 PREFIX=%(installdir)s" + +# extensive testing can be enabled by uncommenting the line below +# runtest = 'PATH=.:$PATH lapack-timing' + +sanity_check_paths = { + 'files': ['include/cblas.h', 'include/f77blas.h', 'include/lapacke_config.h', 'include/lapacke.h', + 'include/lapacke_mangling.h', 'include/lapacke_utils.h', 'include/openblas_config.h', + 'lib/libopenblas.a', 'lib/libopenblas.%s' % SHLIB_EXT], + 'dirs': [], +} + +moduleclass = 'numlib' diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.2.20_fix-Intel-L1-cache-size-detection.patch b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.2.20_fix-Intel-L1-cache-size-detection.patch new file mode 100644 index 000000000000..f974460dc6fc --- /dev/null +++ b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.2.20_fix-Intel-L1-cache-size-detection.patch @@ -0,0 +1,188 @@ +fixes detection of L1 cache size on Intel processors, incl. Intel Haswell) +see https://github.com/xianyi/OpenBLAS/pull/1236 && https://github.com/xianyi/OpenBLAS/issues/1232 +--- a/cpuid_x86.c ++++ b/cpuid_x86.c +@@ -71,12 +71,23 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) + *edx = cpuInfo[3]; + } + ++void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx) ++{ ++ int cpuInfo[4] = {-1}; ++ __cpuidex(cpuInfo, op, count); ++ *eax = cpuInfo[0]; ++ *ebx = cpuInfo[1]; ++ *ecx = cpuInfo[2]; ++ *edx = cpuInfo[3]; ++} ++ + #else + + #ifndef CPUIDEMU + + #if defined(__APPLE__) && defined(__i386__) + void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx); ++void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx); + #else + static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ + #if defined(__i386__) && defined(__PIC__) +@@ -90,6 +101,19 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ + ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc"); + #endif + } ++ ++static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){ ++#if defined(__i386__) && defined(__PIC__) ++ __asm__ __volatile__ ++ ("mov %%ebx, %%edi;" ++ "cpuid;" ++ "xchgl %%ebx, %%edi;" ++ : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc"); ++#else ++ __asm__ __volatile__ ++ ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc"); ++#endif ++} + #endif + + #else +@@ -157,6 +157,10 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int * + *edx = idlist[current].d; + } + ++void cpuid_count (unsigned int op, unsigned int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { ++ return cpuid (op, eax, ebx, ecx, edx); ++} ++ + #endif + + #endif // _MSC_VER +@@ -312,9 +336,9 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + cpuid(0, &cpuid_level, &ebx, &ecx, &edx); + + if (cpuid_level > 1) { +- ++ int numcalls =0 ; + cpuid(2, &eax, &ebx, &ecx, &edx); +- ++ numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries + info[ 0] = BITMASK(eax, 8, 0xff); + info[ 1] = BITMASK(eax, 16, 0xff); + info[ 2] = BITMASK(eax, 24, 0xff); +@@ -335,7 +359,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + info[14] = BITMASK(edx, 24, 0xff); + + for (i = 0; i < 15; i++){ +- + switch (info[i]){ + + /* This table is from http://www.sandpile.org/ia32/cpuid.htm */ +@@ -637,12 +660,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + LD1.linesize = 64; + break; + case 0x63 : +- DTB.size = 2048; +- DTB.associative = 4; +- DTB.linesize = 32; +- LDTB.size = 4096; +- LDTB.associative= 4; +- LDTB.linesize = 32; ++ DTB.size = 2048; ++ DTB.associative = 4; ++ DTB.linesize = 32; ++ LDTB.size = 4096; ++ LDTB.associative= 4; ++ LDTB.linesize = 32; ++ break; + case 0x66 : + LD1.size = 8; + LD1.associative = 4; +@@ -675,12 +699,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + LC1.associative = 8; + break; + case 0x76 : +- ITB.size = 2048; +- ITB.associative = 0; +- ITB.linesize = 8; +- LITB.size = 4096; +- LITB.associative= 0; +- LITB.linesize = 8; ++ ITB.size = 2048; ++ ITB.associative = 0; ++ ITB.linesize = 8; ++ LITB.size = 4096; ++ LITB.associative= 0; ++ LITB.linesize = 8; ++ break; + case 0x77 : + LC1.size = 16; + LC1.associative = 4; +@@ -891,6 +916,68 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + } + + if (get_vendor() == VENDOR_INTEL) { ++ if(LD1.size<=0 || LC1.size<=0){ ++ //If we didn't detect L1 correctly before, ++ int count; ++ for (count=0;count <4;count++) { ++ cpuid_count(4, count, &eax, &ebx, &ecx, &edx); ++ switch (eax &0x1f) { ++ case 0: ++ continue; ++ case 1: ++ case 3: ++ { ++ switch ((eax >>5) &0x07) ++ { ++ case 1: ++ { ++// fprintf(stderr,"L1 data cache...\n"); ++ int sets = ecx+1; ++ int lines = (ebx & 0x0fff) +1; ++ ebx>>=12; ++ int part = (ebx&0x03ff)+1; ++ ebx >>=10; ++ int assoc = (ebx&0x03ff)+1; ++ LD1.size = (assoc*part*lines*sets)/1024; ++ LD1.associative = assoc; ++ LD1.linesize= lines; ++ break; ++ } ++ default: ++ break; ++ } ++ break; ++ } ++ case 2: ++ { ++ switch ((eax >>5) &0x07) ++ { ++ case 1: ++ { ++// fprintf(stderr,"L1 instruction cache...\n"); ++ int sets = ecx+1; ++ int lines = (ebx & 0x0fff) +1; ++ ebx>>=12; ++ int part = (ebx&0x03ff)+1; ++ ebx >>=10; ++ int assoc = (ebx&0x03ff)+1; ++ LC1.size = (assoc*part*lines*sets)/1024; ++ LC1.associative = assoc; ++ LC1.linesize= lines; ++ break; ++ } ++ default: ++ break; ++ } ++ break; ++ ++ } ++ default: ++ break; ++ } ++ } ++ } ++ + cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx); + if (cpuid_level >= 0x80000006) { + if(L2.size<=0){ diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.2.20_revert-honor-cpuset.patch b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.2.20_revert-honor-cpuset.patch new file mode 100644 index 000000000000..240b757a9713 --- /dev/null +++ b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.2.20_revert-honor-cpuset.patch @@ -0,0 +1,135 @@ +revert changes to honor cgroup/cpuset limits that was merged prematurely in OpenBLAS 0.2.20 +see https://github.com/xianyi/OpenBLAS/pull/1247 +diff --git a/driver/others/init.c b/driver/others/init.c +index 4c75d72e4..3e6176967 100644 +--- a/driver/others/init.c ++++ b/driver/others/init.c +@@ -778,11 +778,11 @@ static int initialized = 0; + void gotoblas_affinity_init(void) { + + int cpu, num_avail; +-#ifndef USE_OPENMP ++#ifndef USE_OPENMP + cpu_set_t cpu_mask; + #endif + int i; +- ++ + if (initialized) return; + + initialized = 1; +@@ -826,54 +826,15 @@ void gotoblas_affinity_init(void) { + common -> shmid = pshmid; + + if (common -> magic != SH_MAGIC) { +- cpu_set_t *cpusetp; +- int nums; +- int ret; +- + #ifdef DEBUG + fprintf(stderr, "Shared Memory Initialization.\n"); + #endif + + //returns the number of processors which are currently online +- +- nums = sysconf(_SC_NPROCESSORS_CONF); +- +-#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 3) +- common->num_procs = nums; +-#elif __GLIBC_PREREQ(2, 7) +- cpusetp = CPU_ALLOC(nums); +- if (cpusetp == NULL) { +- common->num_procs = nums; +- } else { +- size_t size; +- size = CPU_ALLOC_SIZE(nums); +- ret = sched_getaffinity(0,size,cpusetp); +- if (ret!=0) +- common->num_procs = nums; +- else +- common->num_procs = CPU_COUNT_S(size,cpusetp); +- } +- CPU_FREE(cpusetp); +-#else +- ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); +- if (ret!=0) { +- common->num_procs = nums; +- } else { +-#if !__GLIBC_PREREQ(2, 6) +- int i; +- int n = 0; +- for (i=0;inum_procs = n; +- } +-#else +- common->num_procs = CPU_COUNT(sizeof(cpu_set_t),cpusetp); +-#endif +- +-#endif ++ common -> num_procs = sysconf(_SC_NPROCESSORS_CONF);; + + if(common -> num_procs > MAX_CPUS) { +- fprintf(stderr, "\nOpenBLAS Warning : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS); ++ fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS); + exit(1); + } + +@@ -886,7 +847,7 @@ void gotoblas_affinity_init(void) { + if (common -> num_nodes > 1) numa_mapping(); + + common -> final_num_procs = 0; +- for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number. ++ for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number. + + for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] = 0; + +diff --git a/driver/others/memory.c b/driver/others/memory.c +index 38d063715..916950315 100644 +--- a/driver/others/memory.c ++++ b/driver/others/memory.c +@@ -175,44 +175,7 @@ int get_num_procs(void); + #else + int get_num_procs(void) { + static int nums = 0; +-cpu_set_t *cpusetp; +-size_t size; +-int ret; +-int i,n; +- + if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); +-#if !defined(OS_LINUX) +- return nums; +-#endif +- +-#if !defined(__GLIBC_PREREQ) +- return nums; +-#endif +-#if !__GLIBC_PREREQ(2, 3) +- return nums; +-#endif +- +-#if !__GLIBC_PREREQ(2, 7) +- ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); +- if (ret!=0) return nums; +- n=0; +-#if !__GLIBC_PREREQ(2, 6) +- for (i=0;i