Skip to content

Commit 4cb909b

Browse files
shibatchNaoki Shibata
and
Naoki Shibata
authored
Further cleanup (#638)
Co-authored-by: Naoki Shibata <[email protected]>
1 parent a0f91a6 commit 4cb909b

14 files changed

+175
-95
lines changed

CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@ if(SLEEF_SHOW_CONFIG)
334334
message(" Detected C compiler: ${CMAKE_C_COMPILER_ID} @ ${CMAKE_C_COMPILER}")
335335
message(" CMake: ${CMAKE_VERSION}")
336336
message(" Make program: ${CMAKE_MAKE_PROGRAM}")
337+
message(" CMake build type: ${CMAKE_BUILD_TYPE}")
337338
if(CMAKE_CROSSCOMPILING)
338339
message(" Crosscompiling SLEEF.")
339340
message(" Native build dir: ${NATIVE_BUILD_DIR}")

Configure.cmake

+1-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ endif()
9999

100100
if (SLEEF_ENABLE_TLFLOAT)
101101
set(TLFLOAT_MINIMUM_VERSION 1.11.2)
102-
set(TLFLOAT_GIT_TAG "5b03b9fd41aaf4d655361f971fe45e738646f286")
102+
set(TLFLOAT_GIT_TAG "356cb5ee4218fd93fbb73c76869983408b97d90a")
103103

104104
set(TLFLOAT_SOURCE_DIR "${PROJECT_SOURCE_DIR}/submodules/tlfloat")
105105
set(TLFLOAT_INSTALL_DIR "${SLEEF_SUBMODULE_INSTALL_DIR}/tlfloat")

Jenkinsfile

+41-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,29 @@ pipeline {
44
stages {
55
stage('Preamble') {
66
parallel {
7+
stage('x86_64 linux clang-18') {
8+
agent { label 'x86_64 && ubuntu24 && avx512f' }
9+
options { skipDefaultCheckout() }
10+
steps {
11+
cleanWs()
12+
checkout scm
13+
sh '''
14+
echo "x86_64 clang-18 on" `hostname`
15+
export CC=clang-18
16+
export CXX=clang++-18
17+
export CUDACXX=/opt/cuda-12.6/bin/nvcc
18+
mkdir build
19+
cd build
20+
cmake .. -GNinja -DCMAKE_INSTALL_PREFIX=../../install -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_DFT=TRUE -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_BUILD_INLINE_HEADERS=TRUE -DSLEEF_ENFORCE_SSE2=TRUE -DSLEEF_ENFORCE_SSE4=TRUE -DSLEEF_ENFORCE_AVX=TRUE -DSLEEF_ENFORCE_AVX2=TRUE -DSLEEF_ENFORCE_AVX512F=TRUE -DSLEEF_ENABLE_TESTER4=True -DSLEEF_ASAN=True
21+
cmake -E time ninja
22+
export OMP_WAIT_POLICY=passive
23+
export CTEST_OUTPUT_ON_FAILURE=TRUE
24+
ctest -j `nproc`
25+
ninja install
26+
'''
27+
}
28+
}
29+
730
stage('x86_64 linux gcc-13') {
831
agent { label 'x86_64 && ubuntu24 && cuda' }
932
options { skipDefaultCheckout() }
@@ -27,6 +50,23 @@ pipeline {
2750
}
2851
}
2952

53+
stage('x86_64 windows clang') {
54+
agent { label 'windows11 && vs2022' }
55+
options { skipDefaultCheckout() }
56+
steps {
57+
cleanWs()
58+
checkout scm
59+
bat """
60+
call "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\VC\\Auxiliary\\Build\\vcvars64.bat"
61+
if not %ERRORLEVEL% == 0 exit /b %ERRORLEVEL%
62+
call "winbuild-clang.bat" -DCMAKE_BUILD_TYPE=Release -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_DFT=False -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_ENFORCE_SSE2=TRUE -DSLEEF_ENFORCE_SSE4=TRUE -DSLEEF_ENFORCE_AVX=TRUE -DSLEEF_ENFORCE_AVX2=TRUE -DSLEEF_ENFORCE_AVX512F=TRUE -DSLEEF_ENABLE_TESTER4=False -DSLEEF_DISABLE_SSL=False
63+
if not %ERRORLEVEL% == 0 exit /b %ERRORLEVEL%
64+
ctest -j 4 --output-on-failure
65+
exit /b %ERRORLEVEL%
66+
"""
67+
}
68+
}
69+
3070
stage('x86_64 windows vs2022') {
3171
agent { label 'windows11 && vs2022' }
3272
options { skipDefaultCheckout() }
@@ -36,7 +76,7 @@ pipeline {
3676
bat """
3777
call "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\VC\\Auxiliary\\Build\\vcvars64.bat"
3878
if not %ERRORLEVEL% == 0 exit /b %ERRORLEVEL%
39-
call "winbuild-msvc.bat" -DCMAKE_BUILD_TYPE=Release -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_DFT=TRUE -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_ENFORCE_SSE2=TRUE -DSLEEF_ENFORCE_SSE4=TRUE -DSLEEF_ENFORCE_AVX=TRUE -DSLEEF_ENFORCE_AVX2=TRUE -DSLEEF_ENFORCE_AVX512F=TRUE -DSLEEF_ENABLE_TESTER4=True -DSLEEF_DISABLE_SSL=True
79+
call "winbuild-msvc.bat" -DCMAKE_BUILD_TYPE=Release -DSLEEF_SHOW_CONFIG=1 -DSLEEF_BUILD_DFT=True -DSLEEF_BUILD_QUAD=TRUE -DSLEEF_ENFORCE_SSE2=TRUE -DSLEEF_ENFORCE_SSE4=TRUE -DSLEEF_ENFORCE_AVX=TRUE -DSLEEF_ENFORCE_AVX2=TRUE -DSLEEF_ENFORCE_AVX512F=TRUE -DSLEEF_ENABLE_TESTER4=True -DSLEEF_DISABLE_SSL=True
4080
if not %ERRORLEVEL% == 0 exit /b %ERRORLEVEL%
4181
ctest -j 4 --output-on-failure
4282
exit /b %ERRORLEVEL%

src/arch/helperrvv.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -1171,7 +1171,7 @@ static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdoub
11711171
// probably only iterate 2 or 4 times.
11721172
//
11731173
ptr += offset * 2;
1174-
for (int i = 0; i < VECTLENDP; i += 2) {
1174+
for (int i = 0; i < (int)VECTLENDP; i += 2) {
11751175
// PROTIP: Avoid modifying `v` within the loop, and just extract the useful
11761176
// part directly in each iteration, because we can. This avoids a
11771177
// loop-carried dependency.
@@ -1185,7 +1185,7 @@ static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdoub
11851185
static INLINE void vscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) {
11861186
// as above re: looping
11871187
ptr += offset * 2;
1188-
for (int i = 0; i < VECTLENSP; i += 2) {
1188+
for (int i = 0; i < (int)VECTLENSP; i += 2) {
11891189
vfloat vv = __riscv_vslidedown(v, i, 2);
11901190
__riscv_vse32(ptr, vv, 2);
11911191
ptr += step * 2;

src/common/addSuffix.c

+2
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,8 @@ int main(int argc, char **argv) {
239239

240240
fclose(fp);
241241

242+
for(int i=0;i<nkeywords;i++) free(keywords[i]);
243+
242244
free(keywords);
243245

244246
exit(0);

src/common/testerutil.c

+13-4
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,20 @@ void xsrand(uint64_t s) {
9999

100100
// Fill memory with random bits
101101
void memrand(void *p, int size) {
102-
uint64_t *q = (uint64_t *)p;
102+
uint8_t *q = (uint8_t *)p;
103103
int i;
104-
for(i=0;i<size;i+=8) *q++ = xrand();
105-
uint8_t *r = (uint8_t *)q;
106-
for(;i<size;i++) *r++ = xrand() & 0xff;
104+
for(i=0;i<(size & ~7);i+=8) {
105+
uint64_t u = xrand();
106+
*q++ = (uint8_t)(u & 0xff); u >>= 8;
107+
*q++ = (uint8_t)(u & 0xff); u >>= 8;
108+
*q++ = (uint8_t)(u & 0xff); u >>= 8;
109+
*q++ = (uint8_t)(u & 0xff); u >>= 8;
110+
*q++ = (uint8_t)(u & 0xff); u >>= 8;
111+
*q++ = (uint8_t)(u & 0xff); u >>= 8;
112+
*q++ = (uint8_t)(u & 0xff); u >>= 8;
113+
*q++ = (uint8_t)(u & 0xff); u >>= 8;
114+
}
115+
for(;i<size;i++) *q++ = xrand() & 0xff;
107116
}
108117

109118
//

src/common/testerutil.h

+53
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,22 @@
33
// (See accompanying file LICENSE.txt or copy at
44
// http://www.boost.org/LICENSE_1_0.txt)
55

6+
#ifdef __cplusplus
7+
#include <tlfloat/tlfloat.h>
8+
using namespace tlfloat;
9+
#endif
10+
11+
#if defined(__GNUC__) && !defined(__clang__)
12+
#pragma GCC diagnostic ignored "-Wuninitialized"
13+
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
14+
#endif
15+
16+
#if defined(__clang__)
17+
#pragma clang diagnostic ignored "-Wvla-cxx-extension"
18+
#pragma clang diagnostic ignored "-Wuninitialized"
19+
#pragma clang diagnostic ignored "-Wtautological-compare"
20+
#endif
21+
622
#define DENORMAL_DBL_MIN (4.9406564584124654418e-324)
723
#define POSITIVE_INFINITY INFINITY
824
#define NEGATIVE_INFINITY (-INFINITY)
@@ -15,6 +31,10 @@
1531
# define M_PIf ((float)M_PI)
1632
#endif
1733

34+
#ifdef __cplusplus
35+
extern "C" {
36+
#endif
37+
1838
extern int enableFlushToZero;
1939
double flushToZero(double y);
2040

@@ -98,3 +118,36 @@ void mpfr_cospi(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd);
98118
#endif
99119
void mpfr_lgamma_nosign(mpfr_t ret, mpfr_t arg, mpfr_rnd_t rnd);
100120
#endif
121+
122+
#ifdef __cplusplus
123+
}
124+
125+
template<typename T>
126+
static double countULP(T ot, const T& oc,
127+
const int nbmant, const T& fltmin, const T& fltmax,
128+
const bool checkSignedZero=false, const double abound=0.0) {
129+
if (isnan_(oc) && isnan_(ot)) return 0;
130+
if (isnan_(oc) || isnan_(ot)) return 10001;
131+
if (isinf_(oc) && !isinf_(ot)) return INFINITY;
132+
133+
const T halffltmin = mul_(fltmin, T(0.5));
134+
const bool ciszero = fabs_(oc) < halffltmin, cisinf = fabs_(oc) > fltmax;
135+
136+
if (cisinf && isinf_(ot) && signbit_(oc) == signbit_(ot)) return 0;
137+
if (ciszero && ot != 0) return 10000;
138+
if (checkSignedZero && ciszero && ot == 0 && signbit_(oc) != signbit_(ot)) return 10002;
139+
140+
double v = 0;
141+
if (isinf_(ot) && !isinf_(oc)) {
142+
ot = copysign_(fltmax, ot);
143+
v = 1;
144+
}
145+
146+
const int ec = ilogb_(oc);
147+
148+
auto e = fabs_(oc - ot);
149+
if (e < abound) return 0;
150+
151+
return double(div_(e, fmax_(ldexp_(T(1), ec + 1 - nbmant), fltmin))) + v;
152+
}
153+
#endif

src/dft-tester/naivetest.c

+3-8
Original file line numberDiff line numberDiff line change
@@ -126,20 +126,12 @@ int check_cf(int n) {
126126
//
127127

128128
int success = 1;
129-
double rmsn = 0, rmsd = 0;
130129

131130
for(i=0;i<n;i++) {
132131
if ((fabs(sy[(i*2+0)] - creal(fs[i])) > THRES) ||
133132
(fabs(sy[(i*2+1)] - cimag(fs[i])) > THRES)) {
134133
success = 0;
135134
}
136-
137-
double t;
138-
t = (sy[(i*2+0)] - creal(fs[i]));
139-
rmsn += t*t;
140-
t = (sy[(i*2+1)] - cimag(fs[i]));
141-
rmsn += t*t;
142-
rmsd += creal(fs[i]) * creal(fs[i]) + cimag(fs[i]) * cimag(fs[i]);
143135
}
144136

145137
//
@@ -376,6 +368,9 @@ int check_arf(int n) {
376368

377369
//
378370

371+
free(fs);
372+
free(ts);
373+
379374
Sleef_free(sx);
380375
Sleef_free(sy);
381376
SleefDFT_dispose(p);

src/dft/dftcommon.c

+8
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,14 @@ void freeTables(SleefDFT *p) {
109109
free(p->tbl[N]);
110110
p->tbl[N] = NULL;
111111
}
112+
113+
for(int i=0;i<p->nThread;i++) {
114+
Sleef_free(p->x1[i]);
115+
Sleef_free(p->x0[i]);
116+
}
117+
118+
free(p->x1);
119+
free(p->x0);
112120
}
113121

114122
EXPORT void SleefDFT_dispose(SleefDFT *p) {

src/libm-tester/CMakeLists.txt

+5-5
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ macro(test_extension SIMD)
258258
PRIVATE ${FLAGS_ENABLE_${SIMD}})
259259
target_compile_definitions(${TARGET_TESTER4_${SIMD}}
260260
PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} TLFLOAT_ENABLE_INLINING=1)
261-
target_link_libraries(${TARGET_TESTER4_${SIMD}} ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES})
261+
target_link_libraries(${TARGET_TESTER4_${SIMD}} ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ})
262262
if (FORCE_AAVPCS)
263263
target_compile_definitions(${TARGET_TESTER4_${SIMD}} PRIVATE ENABLE_AAVPCS=1)
264264
endif(FORCE_AAVPCS)
@@ -282,7 +282,7 @@ macro(test_extension SIMD)
282282
PRIVATE ${FLAGS_ENABLE_${SIMD}})
283283
target_compile_definitions(${TARGET_TESTER4Y_${SIMD}}
284284
PRIVATE ENABLE_${SIMD}=1 ${COMMON_TARGET_DEFINITIONS} DETERMINISTIC=1 TLFLOAT_ENABLE_INLINING=1)
285-
target_link_libraries(${TARGET_TESTER4Y_${SIMD}} ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES})
285+
target_link_libraries(${TARGET_TESTER4Y_${SIMD}} ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ})
286286
add_dependencies(${TARGET_TESTER4Y_${SIMD}} ${TARGET_HEADERS})
287287
add_dependencies(${TARGET_TESTER4Y_${SIMD}} ${TARGET_LIBSLEEF})
288288
add_dependencies(${TARGET_TESTER4Y_${SIMD}} ext_tlfloat)
@@ -301,7 +301,7 @@ macro(test_extension SIMD)
301301
add_executable(${TARGET_TESTER4I_${SIMD}} ${TESTER4_SRC})
302302
target_compile_options(${TARGET_TESTER4I_${SIMD}}
303303
PRIVATE ${FLAGS_ENABLE_${SIMD}})
304-
target_link_libraries(${TARGET_TESTER4I_${SIMD}} ${TLFLOAT_LIBRARIES})
304+
target_link_libraries(${TARGET_TESTER4I_${SIMD}} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ})
305305
if(CMAKE_C_COMPILER_ID MATCHES "GNU")
306306
target_compile_options(${TARGET_TESTER4I_${SIMD}} PRIVATE "-Wno-unknown-pragmas")
307307
endif()
@@ -513,7 +513,7 @@ if (SLEEF_ARCH_X86)
513513
target_compile_definitions(tester4dsp128 PRIVATE
514514
ENABLE_DSP128=1 ${COMMON_TARGET_DEFINITIONS} TLFLOAT_ENABLE_INLINING=1)
515515
target_compile_options(tester4dsp128 PRIVATE ${FLAGS_ENABLE_SSE2})
516-
target_link_libraries(tester4dsp128 ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES})
516+
target_link_libraries(tester4dsp128 ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ})
517517
add_dependencies(tester4dsp128 ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ext_tlfloat)
518518
add_test_with_emu(1.0 tester4dsp128)
519519

@@ -522,7 +522,7 @@ if (SLEEF_ARCH_X86)
522522
target_compile_definitions(tester4dsp256 PRIVATE
523523
ENABLE_DSP256=1 ${COMMON_TARGET_DEFINITIONS} TLFLOAT_ENABLE_INLINING=1)
524524
target_compile_options(tester4dsp256 PRIVATE ${FLAGS_ENABLE_AVX})
525-
target_link_libraries(tester4dsp256 ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES})
525+
target_link_libraries(tester4dsp256 ${TARGET_LIBSLEEF} ${TLFLOAT_LIBRARIES} ${TARGET_TESTERUTIL_OBJ})
526526
add_dependencies(tester4dsp256 ${TARGET_HEADERS} ${TARGET_LIBSLEEF} ext_tlfloat)
527527
add_test_with_emu(1.0 tester4dsp256)
528528
endif(SLEEF_ENABLE_TESTER4 AND TLFLOAT_LIBRARIES)

src/libm-tester/tester2qp.c

+8-8
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ int main(int argc,char **argv)
165165
mpfr_inits(fra, frb, frc, frd, frw, frx, fry, frz, NULL);
166166

167167
conv_t cd;
168-
Sleef_quad d, t, d2, zo;
168+
Sleef_quad d, t; //, d2, zo;
169169

170170
int cnt, ecnt = 0;
171171

@@ -178,26 +178,26 @@ int main(int argc,char **argv)
178178
printf("%g\n", countULP2(cd.d, frx));
179179
#endif
180180

181-
const Sleef_quad rangemax = 1e+9;
181+
//const Sleef_quad rangemax = 1e+9;
182182

183183
for(cnt = 0;ecnt < 1000;cnt++) {
184184
switch(cnt & 7) {
185185
case 0:
186186
d = rnd();
187-
d2 = rnd();
188-
zo = rnd();
187+
//d2 = rnd();
188+
//zo = rnd();
189189
break;
190190
case 1:
191191
cd.d = rint((2 * (double)random() / RAND_MAX - 1) * 1e+10) * M_PI_4;
192192
cd.u128 += (random() & 0xff) - 0x7f;
193193
d = cd.d;
194-
d2 = rnd();
195-
zo = rnd();
194+
//d2 = rnd();
195+
//zo = rnd();
196196
break;
197197
default:
198198
d = rnd_fr();
199-
d2 = rnd_fr();
200-
zo = rnd_zo();
199+
//d2 = rnd_fr();
200+
//zo = rnd_zo();
201201
break;
202202
}
203203

0 commit comments

Comments
 (0)