Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions easybuild/easyconfigs/f/FFTW/FFTW-3.3.6-gompi-2017b.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name = 'FFTW'
version = '3.3.6'

homepage = 'http://www.fftw.org'
description = """FFTW is a C subroutine library for computing the discrete Fourier transform (DFT)
in one or more dimensions, of arbitrary input size, and of both real and complex data."""

toolchain = {'name': 'gompi', 'version': '2017b'}
toolchainopts = {'pic': True}

source_urls = [homepage]
sources = ['fftw-%(version)s-pl2.tar.gz']
checksums = ['a5de35c5c824a78a058ca54278c706cdf3d4abba1c56b63531c2cb05f5d57da2']

runtest = 'check'

moduleclass = 'numlib'
33 changes: 33 additions & 0 deletions easybuild/easyconfigs/f/foss/foss-2017b.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
easyblock = 'Toolchain'

name = 'foss'
version = '2017b'

homepage = '(none)'
description = """GNU Compiler Collection (GCC) based compiler toolchain, including
OpenMPI for MPI support, OpenBLAS (BLAS and LAPACK support), FFTW and ScaLAPACK."""

toolchain = {'name': 'dummy', 'version': 'dummy'}

gccver = '6.4.0-2.28'

blaslib = 'OpenBLAS'
blasver = '0.2.20'
blas = '%s-%s' % (blaslib, blasver)

# toolchain used to build foss dependencies
comp_mpi_tc_name = 'gompi'
comp_mpi_tc = (comp_mpi_tc_name, version)

# we need GCC and OpenMPI as explicit dependencies instead of gompi toolchain
# because of toolchain preparation functions
# For binutils, stick to http://wiki.osdev.org/Cross-Compiler_Successful_Builds
dependencies = [
('GCC', gccver),
('OpenMPI', '2.1.1', '', ('GCC', gccver)),
(blaslib, blasver, '', ('GCC', gccver)),
('FFTW', '3.3.6', '', comp_mpi_tc),
('ScaLAPACK', '2.0.2', '-%s' % blas, comp_mpi_tc),
]

moduleclass = 'toolchain'
20 changes: 20 additions & 0 deletions easybuild/easyconfigs/g/gompi/gompi-2017b.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
easyblock = "Toolchain"

name = 'gompi'
version = '2017b'

homepage = '(none)'
description = """GNU Compiler Collection (GCC) based compiler toolchain,
including OpenMPI for MPI support."""

toolchain = {'name': 'dummy', 'version': 'dummy'}

gccver = '6.4.0-2.28'

# compiler toolchain dependencies
dependencies = [
('GCC', gccver), # includes both GCC and binutils
('OpenMPI', '2.1.1', '', ('GCC', gccver)),
]

moduleclass = 'toolchain'
19 changes: 19 additions & 0 deletions easybuild/easyconfigs/h/HPL/HPL-2.2-foss-2017b.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name = 'HPL'
version = '2.2'

homepage = 'http://www.netlib.org/benchmark/hpl/'
description = """HPL is a software package that solves a (random) dense linear system in double precision (64 bits)
arithmetic on distributed-memory computers. It can thus be regarded as a portable as well as freely available
implementation of the High Performance Computing Linpack Benchmark."""

toolchain = {'name': 'foss', 'version': '2017b'}
toolchainopts = {'usempi': True}

sources = [SOURCELOWER_TAR_GZ]
source_urls = ['http://www.netlib.org/benchmark/%(namelower)s']
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a checksum?

checksums = ['ac7534163a09e21a5fa763e4e16dfc119bc84043f6e6a807aba666518f8df440']

# fix Make dependencies, so parallel build also works
patches = ['HPL_parallel-make.patch']

moduleclass = 'tools'
51 changes: 51 additions & 0 deletions easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.2.20-GCC-6.4.0-2.28.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
easyblock = 'ConfigureMake'

name = 'OpenBLAS'
version = '0.2.20'

homepage = 'http://xianyi.github.com/OpenBLAS/'
description = "OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version."

toolchain = {'name': 'GCC', 'version': '6.4.0-2.28'}

large_src = 'large.tgz'
timing_src = 'timing.tgz'

source_urls = [
# order matters, trying to download the large.tgz/timing.tgz LAPACK tarballs from GitHub causes trouble
'http://www.netlib.org/lapack/timing/',
'https://github.com/xianyi/OpenBLAS/archive/',
]
sources = ['v%(version)s.tar.gz']
checksums = [
'5ef38b15d9c652985774869efd548b8e3e972e1e99475c673b25537ed7bcf394', # v0.2.20.tar.gz (OpenBLAS)
'f328d88b7fa97722f271d7d0cfea1c220e0f8e5ed5ff01d8ef1eb51d6f4243a1', # large.tgz
'999c65f8ea8bd4eac7f1c7f3463d4946917afd20a997807300fe35d70122f3af', # timing.tgz
# OpenBLAS-0.2.20_fix-Intel-L1-cache-size-detection.patch
'1d043e4838ec1f90b2b49318b780e3ab13b46133cb72a8d83eb0e3b1b056c4d6',
'1e6a046ab658c6e0b351de901d2812db28c2042f9f141416144c2faaf71fbb37', # OpenBLAS-0.2.20_revert-honor-cpuset.patch
]

patches = [
(large_src, '.'),
(timing_src, '.'),
'OpenBLAS-%(version)s_fix-Intel-L1-cache-size-detection.patch',
'OpenBLAS-%(version)s_revert-honor-cpuset.patch',
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked with one of the OpenBLAS co-maintainers whether going forward with OpenBLAS 0.2.20 with these two patches on top makes sense w.r.t. stability, and he confirmed it was.

No need to keep stalling this and hope for a 'quick' OpenBLAS 0.2.21 that includes these fixes...

]

skipsteps = ['configure']

buildopts = 'BINARY=64 USE_THREAD=1 USE_OPENMP=1 CC="$CC" FC="$F77"'
installopts = "USE_THREAD=1 USE_OPENMP=1 PREFIX=%(installdir)s"

# extensive testing can be enabled by uncommenting the line below
# runtest = 'PATH=.:$PATH lapack-timing'

sanity_check_paths = {
'files': ['include/cblas.h', 'include/f77blas.h', 'include/lapacke_config.h', 'include/lapacke.h',
'include/lapacke_mangling.h', 'include/lapacke_utils.h', 'include/openblas_config.h',
'lib/libopenblas.a', 'lib/libopenblas.%s' % SHLIB_EXT],
'dirs': [],
}

moduleclass = 'numlib'
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
fixes detection of L1 cache size on Intel processors, incl. Intel Haswell)
see https://github.com/xianyi/OpenBLAS/pull/1236 && https://github.com/xianyi/OpenBLAS/issues/1232
--- a/cpuid_x86.c
+++ b/cpuid_x86.c
@@ -71,12 +71,23 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
*edx = cpuInfo[3];
}

+void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx)
+{
+ int cpuInfo[4] = {-1};
+ __cpuidex(cpuInfo, op, count);
+ *eax = cpuInfo[0];
+ *ebx = cpuInfo[1];
+ *ecx = cpuInfo[2];
+ *edx = cpuInfo[3];
+}
+
#else

#ifndef CPUIDEMU

#if defined(__APPLE__) && defined(__i386__)
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
+void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx);
#else
static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
#if defined(__i386__) && defined(__PIC__)
@@ -90,6 +101,19 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
#endif
}
+
+static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){
+#if defined(__i386__) && defined(__PIC__)
+ __asm__ __volatile__
+ ("mov %%ebx, %%edi;"
+ "cpuid;"
+ "xchgl %%ebx, %%edi;"
+ : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
+#else
+ __asm__ __volatile__
+ ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
+#endif
+}
#endif

#else
@@ -157,6 +157,10 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
*edx = idlist[current].d;
}

+void cpuid_count (unsigned int op, unsigned int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
+ return cpuid (op, eax, ebx, ecx, edx);
+}
+
#endif

#endif // _MSC_VER
@@ -312,9 +336,9 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
cpuid(0, &cpuid_level, &ebx, &ecx, &edx);

if (cpuid_level > 1) {
-
+ int numcalls =0 ;
cpuid(2, &eax, &ebx, &ecx, &edx);
-
+ numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries
info[ 0] = BITMASK(eax, 8, 0xff);
info[ 1] = BITMASK(eax, 16, 0xff);
info[ 2] = BITMASK(eax, 24, 0xff);
@@ -335,7 +359,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
info[14] = BITMASK(edx, 24, 0xff);

for (i = 0; i < 15; i++){
-
switch (info[i]){

/* This table is from http://www.sandpile.org/ia32/cpuid.htm */
@@ -637,12 +660,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
LD1.linesize = 64;
break;
case 0x63 :
- DTB.size = 2048;
- DTB.associative = 4;
- DTB.linesize = 32;
- LDTB.size = 4096;
- LDTB.associative= 4;
- LDTB.linesize = 32;
+ DTB.size = 2048;
+ DTB.associative = 4;
+ DTB.linesize = 32;
+ LDTB.size = 4096;
+ LDTB.associative= 4;
+ LDTB.linesize = 32;
+ break;
case 0x66 :
LD1.size = 8;
LD1.associative = 4;
@@ -675,12 +699,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
LC1.associative = 8;
break;
case 0x76 :
- ITB.size = 2048;
- ITB.associative = 0;
- ITB.linesize = 8;
- LITB.size = 4096;
- LITB.associative= 0;
- LITB.linesize = 8;
+ ITB.size = 2048;
+ ITB.associative = 0;
+ ITB.linesize = 8;
+ LITB.size = 4096;
+ LITB.associative= 0;
+ LITB.linesize = 8;
+ break;
case 0x77 :
LC1.size = 16;
LC1.associative = 4;
@@ -891,6 +916,68 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
}

if (get_vendor() == VENDOR_INTEL) {
+ if(LD1.size<=0 || LC1.size<=0){
+ //If we didn't detect L1 correctly before,
+ int count;
+ for (count=0;count <4;count++) {
+ cpuid_count(4, count, &eax, &ebx, &ecx, &edx);
+ switch (eax &0x1f) {
+ case 0:
+ continue;
+ case 1:
+ case 3:
+ {
+ switch ((eax >>5) &0x07)
+ {
+ case 1:
+ {
+// fprintf(stderr,"L1 data cache...\n");
+ int sets = ecx+1;
+ int lines = (ebx & 0x0fff) +1;
+ ebx>>=12;
+ int part = (ebx&0x03ff)+1;
+ ebx >>=10;
+ int assoc = (ebx&0x03ff)+1;
+ LD1.size = (assoc*part*lines*sets)/1024;
+ LD1.associative = assoc;
+ LD1.linesize= lines;
+ break;
+ }
+ default:
+ break;
+ }
+ break;
+ }
+ case 2:
+ {
+ switch ((eax >>5) &0x07)
+ {
+ case 1:
+ {
+// fprintf(stderr,"L1 instruction cache...\n");
+ int sets = ecx+1;
+ int lines = (ebx & 0x0fff) +1;
+ ebx>>=12;
+ int part = (ebx&0x03ff)+1;
+ ebx >>=10;
+ int assoc = (ebx&0x03ff)+1;
+ LC1.size = (assoc*part*lines*sets)/1024;
+ LC1.associative = assoc;
+ LC1.linesize= lines;
+ break;
+ }
+ default:
+ break;
+ }
+ break;
+
+ }
+ default:
+ break;
+ }
+ }
+ }
+
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
if (cpuid_level >= 0x80000006) {
if(L2.size<=0){
Loading