From 7fbc7000a9351755a52f624554c95bc0a0a7f89b Mon Sep 17 00:00:00 2001
From: Marco Barbone <mbarbone@flatironinstitute.org>
Date: Fri, 24 Oct 2025 10:51:01 -0400
Subject: [PATCH] Fix 740

---
 devel/foldrescale.cpp                         |   3 +
 docs/opts.rst                                 |  12 +-
 examples/cuda/example2d1many.cpp              |   6 +-
 examples/cuda/example2d2many.cpp              |   4 +-
 examples/cuda/example2d3many.cpp              |   4 +-
 examples/cuda/getting_started.cpp             |  96 ++++-----
 examples/guru1d1.cpp                          |  40 ++--
 examples/guru1d1f.cpp                         |  40 ++--
 examples/guru2d1.cpp                          |  44 ++--
 examples/guru2d1_adjoint.cpp                  |  44 ++--
 examples/gurumany1d1.cpp                      |  37 ++--
 examples/many1d1.cpp                          |  25 ++-
 examples/simple1d1.cpp                        |  23 +-
 examples/simple1d1f.cpp                       |  25 ++-
 examples/simple2d1.cpp                        |  39 ++--
 examples/simulplans1d1.cpp                    |  43 ++--
 examples/spreadinterponly1d.cpp               |  41 ++--
 examples/threadsafe1d1.cpp                    |  25 ++-
 examples/threadsafe2d2f.cpp                   |   1 -
 include/cufinufft/spreadinterp.h              |  11 +-
 perftest/big2d2f.cpp                          |   1 -
 perftest/guru_timing_test.cpp                 | 196 +++++++++++-------
 perftest/manysmallprobs.cpp                   |  58 +++---
 perftest/spreadtestnd.cpp                     | 131 ++++++------
 src/c_interface.cpp                           |   5 +-
 src/cuda/deconvolve_wrapper.cu                |  25 +--
 src/cuda/spreadinterp.cpp                     |   4 +-
 src/fft.cpp                                   |   6 +-
 src/spreadinterp.cpp                          |  23 +-
 test/adjointness.cpp                          |  58 +++---
 test/basicpassfail.cpp                        |   9 +-
 test/cuda/cufinufft1d_test.cu                 |  17 +-
 test/cuda/cufinufft1dspreadinterponly_test.cu |   4 +-
 test/cuda/cufinufft2d1nupts_test.cu           |  12 +-
 test/cuda/cufinufft2d_test.cu                 |  18 +-
 test/cuda/cufinufft2dmany_test.cu             |  19 +-
 test/cuda/cufinufft3d_test.cu                 |  18 +-
 test/dumbinputs.cpp                           |  52 ++---
 test/finufft1d_test.cpp                       |  33 +--
 test/finufft1dmany_test.cpp                   |  34 +--
 test/finufft2d_test.cpp                       |  30 +--
 test/finufft2dmany_test.cpp                   |  34 +--
 test/finufft3d_test.cpp                       |  32 +--
 test/finufft3dkernel_test.cpp                 |  10 +-
 test/finufft3dmany_test.cpp                   |  33 +--
 test/spreadinterp1d_test.cpp                  |  23 +-
 test/testutils.cpp                            |   8 +-
 47 files changed, 760 insertions(+), 696 deletions(-)

diff --git a/devel/foldrescale.cpp b/devel/foldrescale.cpp
index a84e3f4e4..cb1735e6a 100644
--- a/devel/foldrescale.cpp
+++ b/devel/foldrescale.cpp
@@ -15,6 +15,9 @@
    The macro wins hands-down on i7, even for modern GCC9.
    This should be done in C++ not as a macro, someday.
 */
+
+using finufft::common::INV_2PI;
+
 #define FOLDRESCALE(x, N, p)                                                \
   (p ? (x + (x >= -PI ? (x < PI ? PI : -PI) : 3 * PI)) * ((FLT)INV_2PI * N) \
      : (x >= 0.0 ? (x < (FLT)N ? x : x - (FLT)N) : x + (FLT)N))
diff --git a/docs/opts.rst b/docs/opts.rst
index 70817037b..8eb9b97ce 100644
--- a/docs/opts.rst
+++ b/docs/opts.rst
@@ -238,17 +238,15 @@ However, with FFTW as the FFT library, FINUFFT is thread safe so long as no othe
   #include <finufft.h>
   #include <omp.h>
 
-  using namespace std;
-
   constexpr int N = 65384;
 
-  void locker(void *lck) { reinterpret_cast<recursive_mutex *>(lck)->lock(); }
-  void unlocker(void *lck) { reinterpret_cast<recursive_mutex *>(lck)->unlock(); }
+  void locker(void *lck) { reinterpret_cast<std::recursive_mutex *>(lck)->lock(); }
+  void unlocker(void *lck) { reinterpret_cast<std::recursive_mutex *>(lck)->unlock(); }
 
   int main() {
     int64_t Ns[3]; // guru describes mode array by vector [N1,N2..]
     Ns[0] = N;
-    recursive_mutex lck;
+    std::recursive_mutex lck;
 
     finufft_opts opts;
     finufft_default_opts(&opts);
@@ -259,7 +257,7 @@ However, with FFTW as the FFT library, FINUFFT is thread safe so long as no othe
     opts.fftw_lock_data = reinterpret_cast<void *>(&lck);
 
     // random nonuniform points (x) and complex strengths (c)
-    vector<complex<double>> c(N);
+    std::vector<std::complex<double>> c(N);
 
     // init FFTW threads
     fftw_init_threads();
@@ -268,7 +266,7 @@ However, with FFTW as the FFT library, FINUFFT is thread safe so long as no othe
     #pragma omp parallel for
     for (int j = 0; j < 100; ++j) {
       // allocate output array for FFTW...
-      vector<complex<double>> F1(N);
+      std::vector<std::complex<double>> F1(N);
 
       // FFTW plan
       lck.lock();
diff --git a/examples/cuda/example2d1many.cpp b/examples/cuda/example2d1many.cpp
index 3423a6fac..b35685817 100644
--- a/examples/cuda/example2d1many.cpp
+++ b/examples/cuda/example2d1many.cpp
@@ -100,12 +100,12 @@ int main(int argc, char *argv[])
     std::complex<float> Ft = std::complex<float>(0, 0),
                         J  = std::complex<float>(0, 1) * (float)iflag;
     for (auto j = 0UL; j < M; ++j)
-      Ft += c[j + i * M] * exp(J * (nt1 * x[j] + nt2 * y[j])); // crude direct
+      Ft += c[j + i * M] * std::exp(J * (nt1 * x[j] + nt2 * y[j])); // crude direct
     int it = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2); // index in complex F as 1d array
     printf("[gpu %3d] one mode: abs err in F[%d,%d] is %.3g\n", i, nt1, nt2,
-           abs(Ft - fk[it + i * N]));
+           std::abs(Ft - fk[it + i * N]));
     printf("[gpu %3d] one mode: rel err in F[%d,%d] is %.3g\n", i, nt1, nt2,
-           abs(Ft - fk[it + i * N]) / infnorm(N, fk + i * N));
+           std::abs(Ft - fk[it + i * N]) / infnorm(N, fk + i * N));
   }
 
   cudaFreeHost(x);
diff --git a/examples/cuda/example2d2many.cpp b/examples/cuda/example2d2many.cpp
index 95918b814..57fceec20 100644
--- a/examples/cuda/example2d2many.cpp
+++ b/examples/cuda/example2d2many.cpp
@@ -109,10 +109,10 @@ int main(int argc, char *argv[])
     int m = 0;
     for (int m2 = -(N2 / 2); m2 <= (N2 - 1) / 2; ++m2) // loop in correct order over F
       for (int m1 = -(N1 / 2); m1 <= (N1 - 1) / 2; ++m1)
-        ct += fkstart[m++] * exp(J * (m1 * x[jt] + m2 * y[jt])); // crude direct
+        ct += fkstart[m++] * std::exp(J * (m1 * x[jt] + m2 * y[jt])); // crude direct
 
     printf("[gpu %3d] one targ: rel err in c[%d] is %.3g\n", t, jt,
-           abs(cstart[jt] - ct) / infnorm(M, c));
+           std::abs(cstart[jt] - ct) / infnorm(M, c));
   }
 
   cudaFreeHost(x);
diff --git a/examples/cuda/example2d3many.cpp b/examples/cuda/example2d3many.cpp
index 3ff17c541..c362d09c3 100644
--- a/examples/cuda/example2d3many.cpp
+++ b/examples/cuda/example2d3many.cpp
@@ -112,10 +112,10 @@ int main(int argc, char *argv[])
     int jt  = N / 2; // check arbitrary choice of one targ pt
     std::complex<double> J(0, iflag * 1);
     std::complex<double> fkt(0, 0);
-    for (int m = 0; m < M; m++) fkt += cstart[m] * exp(J * (s[jt] * x[m] + t[jt] * y[m]));
+    for (int m = 0; m < M; m++) fkt += cstart[m] * std::exp(J * (s[jt] * x[m] + t[jt] * y[m]));
 
     printf("[gpu %3d] one targ: rel err in c[%d] is %.3g\n", tr, jt,
-           abs(fkstart[jt] - fkt) / infnorm(N, fkstart));
+           std::abs(fkstart[jt] - fkt) / infnorm(N, fkstart));
   }
 
   cudaFreeHost(x);
diff --git a/examples/cuda/getting_started.cpp b/examples/cuda/getting_started.cpp
index c09a68dcd..18643dfff 100644
--- a/examples/cuda/getting_started.cpp
+++ b/examples/cuda/getting_started.cpp
@@ -1,31 +1,19 @@
 /*
-
-  Simple example of the 1D type-1 transform. To compile, run
-
-       nvcc -o getting_started getting_started.cpp -lcufinufft
-
-  followed by
-
-       ./getting_started
-
-  with the necessary paths set if the library is not installed in the standard
-  directories. If the library has been compiled in the standard way, this means
-
-       export CPATH="${CPATH:+${CPATH}:}../../include"
-       export LIBRARY_PATH="${LIBRARY_PATH:+${LIBRARY_PATH}:}../../build"
-       export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}../../build"
-
- */
-
-#include <complex.h>
+  Simple example of the 1D type-1 transform using std::complex.
+  To compile:
+      nvcc -o getting_started getting_started.cpp -lcufinufft
+*/
+
+#include <cmath>
+#include <complex>
+#include <cstdio>
+#include <cstdlib>
 #include <cuComplex.h>
 #include <cuda_runtime.h>
 #include <cufinufft.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
+#include <vector>
 
-static const double PI = 3.141592653589793238462643383279502884;
+static constexpr double PI = 3.141592653589793238462643383279502884;
 
 int main() {
   // Problem size: number of nonuniform points (M) and grid size (N).
@@ -35,9 +23,9 @@ int main() {
   int64_t modes[1] = {N};
 
   // Host pointers: frequencies (x), coefficients (c), and output (f).
-  float *x;
-  float _Complex *c;
-  float _Complex *f;
+  std::vector<float> x(M);
+  std::vector<std::complex<float>> c(M);
+  std::vector<std::complex<float>> f(N);
 
   // Device pointers.
   float *d_x;
@@ -48,71 +36,61 @@ int main() {
 
   // Manual calculation at a single point idx.
   int idx;
-  float _Complex f0;
-
-  // Allocate the host arrays.
-  x = (float *)malloc(M * sizeof(float));
-  c = (float _Complex *)malloc(M * sizeof(float _Complex));
-  f = (float _Complex *)malloc(N * sizeof(float _Complex));
-
-  // Fill with random numbers. Frequencies must be in the interval [-pi, pi)
-  // while strengths can be any value.
-  srand(0);
+  std::complex<float> f0;
 
+  // Fill with random numbers.
+  std::srand(42);
   for (int j = 0; j < M; ++j) {
-    x[j] = 2 * PI * (((float)rand()) / RAND_MAX - 1);
-    c[j] =
-        (2 * ((float)rand()) / RAND_MAX - 1) + I * (2 * ((float)rand()) / RAND_MAX - 1);
+    x[j]     = 2 * PI * ((float)std::rand() / RAND_MAX - 1);
+    float re = 2 * ((float)std::rand()) / RAND_MAX - 1;
+    float im = 2 * ((float)std::rand()) / RAND_MAX - 1;
+    c[j]     = std::complex<float>(re, im);
   }
 
-  // Allocate the device arrays and copy the x and c arrays.
+  // Allocate the device arrays and copy x and c.
   cudaMalloc(&d_x, M * sizeof(float));
-  cudaMalloc(&d_c, M * sizeof(float _Complex));
-  cudaMalloc(&d_f, N * sizeof(float _Complex));
+  cudaMalloc(&d_c, M * sizeof(cuFloatComplex));
+  cudaMalloc(&d_f, N * sizeof(cuFloatComplex));
+
+  std::vector<cuFloatComplex> c_dev(M);
+  for (int j = 0; j < M; ++j) c_dev[j] = make_cuFloatComplex(c[j].real(), c[j].imag());
 
-  cudaMemcpy(d_x, x, M * sizeof(float), cudaMemcpyHostToDevice);
-  cudaMemcpy(d_c, c, M * sizeof(float _Complex), cudaMemcpyHostToDevice);
+  cudaMemcpy(d_x, x.data(), M * sizeof(float), cudaMemcpyHostToDevice);
+  cudaMemcpy(d_c, c_dev.data(), M * sizeof(cuFloatComplex), cudaMemcpyHostToDevice);
 
-  // Make the cufinufft plan for a 1D type-1 transform with six digits of
-  // tolerance.
-  cufinufftf_makeplan(1, 1, modes, 1, 1, 1e-6, &plan, NULL);
+  // Make the cufinufft plan for 1D type-1 transform.
+  cufinufftf_makeplan(1, 1, modes, 1, 1, 1e-6, &plan, nullptr);
 
   // Set the frequencies of the nonuniform points.
-  cufinufftf_setpts(plan, M, d_x, NULL, NULL, 0, NULL, NULL, NULL);
+  cufinufftf_setpts(plan, M, d_x, nullptr, nullptr, 0, nullptr, nullptr, nullptr);
 
   // Actually execute the plan on the given coefficients and store the result
   // in the d_f array.
   cufinufftf_execute(plan, d_c, d_f);
 
   // Copy the result back onto the host.
-  cudaMemcpy(f, d_f, N * sizeof(float _Complex), cudaMemcpyDeviceToHost);
+  cudaMemcpy(f.data(), d_f, N * sizeof(cuFloatComplex), cudaMemcpyDeviceToHost);
 
   // Destroy the plan and free the device arrays after we're done.
   cufinufftf_destroy(plan);
-
   cudaFree(d_x);
   cudaFree(d_c);
   cudaFree(d_f);
 
   // Pick an index to check the result of the calculation.
   idx = 4 * N / 7;
-
-  printf("f[%d] = %lf + %lfi\n", idx, crealf(f[idx]), cimagf(f[idx]));
+  printf("f[%d] = %lf + %lfi\n", idx, std::real(f[idx]), std::imag(f[idx]));
 
   // Calculate the result manually using the formula for the type-1
   // transform.
   f0 = 0;
 
+  std::complex<float> I(-0.0, 1.0);
   for (int j = 0; j < M; ++j) {
-    f0 += c[j] * cexp(I * x[j] * (idx - N / 2));
+    f0 += c[j] * std::exp(I * x[j] * float(idx - N / 2));
   }
 
-  printf("f0[%d] = %lf + %lfi\n", idx, crealf(f0), cimagf(f0));
-
-  // Finally free the host arrays.
-  free(x);
-  free(c);
-  free(f);
+  printf("f0[%d] = %lf + %lfi\n", idx, std::real(f0), std::imag(f0));
 
   return 0;
 }
diff --git a/examples/guru1d1.cpp b/examples/guru1d1.cpp
index bd18c2526..4afef5837 100644
--- a/examples/guru1d1.cpp
+++ b/examples/guru1d1.cpp
@@ -4,17 +4,13 @@
 
 // specific to this example...
 #include <cassert>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
 #include <vector>
 
-// only good for small projects...
-using namespace std;
-
 static const double PI = 3.141592653589793238462643383279502884;
-// allows 1i to be the imaginary unit... (C++14 onwards)
-using namespace std::complex_literals;
 
 int main(int argc, char *argv[])
 /* Example calling guru C++ interface to FINUFFT library, passing
@@ -42,27 +38,31 @@ int main(int argc, char *argv[])
   } else            // or, NULL here means use default opts...
     finufft_makeplan(type, dim, Ns, +1, ntransf, tol, &plan, NULL);
 
+  const std::complex<double> I(0.0, 1.0);
+
   // generate some random nonuniform points
-  vector<double> x(M);
+  std::vector<double> x(M);
   for (int j = 0; j < M; ++j)
-    x[j] = PI * (2 * ((double)rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
+    x[j] = PI * (2 * ((double)std::rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
   // note FINUFFT doesn't use std::vector types, so we need to make a pointer...
   finufft_setpts(plan, M, x.data(), NULL, NULL, 0, NULL, NULL, NULL);
 
   // generate some complex strengths
-  vector<complex<double>> c(M);
+  std::vector<std::complex<double>> c(M);
   for (int j = 0; j < M; ++j)
-    c[j] =
-        2 * ((double)rand() / RAND_MAX) - 1 + 1i * (2 * ((double)rand() / RAND_MAX) - 1);
+    c[j] = 2 * ((double)std::rand() / RAND_MAX) - 1 +
+           std::complex<double>(0.0, 1.0) *
+               (2 * ((double)std::rand() / RAND_MAX) - 1);
 
   // alloc output array for the Fourier modes, then do the transform
-  vector<complex<double>> F(N);
+  std::vector<std::complex<double>> F(N);
   int ier = finufft_execute(plan, c.data(), F.data());
 
   // for fun, do another with same NU pts (no re-sorting), but new strengths...
   for (int j = 0; j < M; ++j)
-    c[j] =
-        2 * ((double)rand() / RAND_MAX) - 1 + 1i * (2 * ((double)rand() / RAND_MAX) - 1);
+    c[j] = 2 * ((double)std::rand() / RAND_MAX) - 1 +
+           std::complex<double>(0.0, 1.0) *
+               (2 * ((double)std::rand() / RAND_MAX) - 1);
   ier = finufft_execute(plan, c.data(), F.data());
 
   finufft_destroy(plan); // don't forget! done with transforms of this size
@@ -70,15 +70,15 @@ int main(int argc, char *argv[])
   // rest is math checking and reporting...
   int n = 142519;                                   // check the answer just for this mode
   assert(n >= -(double)N / 2 && n < (double)N / 2); // ensure meaningful test
-  complex<double> Ftest = complex<double>(0, 0);
-  for (int j = 0; j < M; ++j) Ftest += c[j] * exp(1i * (double)n * x[j]);
+  std::complex<double> Ftest(0, 0);
+  for (int j = 0; j < M; ++j) Ftest += c[j] * std::exp(I * (double)n * x[j]);
   int nout    = n + N / 2; // index in output array for freq mode n
   double Fmax = 0.0;       // compute inf norm of F
   for (int m = 0; m < N; ++m) {
-    double aF = abs(F[m]);
+    double aF = std::abs(F[m]);
     if (aF > Fmax) Fmax = aF;
   }
-  double err = abs(F[nout] - Ftest) / Fmax;
+  double err = std::abs(F[nout] - Ftest) / Fmax;
   printf("guru 1D type-1 double-prec NUFFT done. ier=%d, rel err in F[%d] is %.3g\n", ier,
          n, err);
 
diff --git a/examples/guru1d1f.cpp b/examples/guru1d1f.cpp
index 96626f897..dbaa667fe 100644
--- a/examples/guru1d1f.cpp
+++ b/examples/guru1d1f.cpp
@@ -3,17 +3,13 @@
 #include <finufft.h>
 
 // specific to this example...
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
 #include <vector>
 
-// only good for small projects...
-using namespace std;
-
 static const double PI = 3.141592653589793238462643383279502884;
-// allows 1i to be the imaginary unit... (C++14 onwards)
-using namespace std::complex_literals;
 
 int main(int argc, char *argv[])
 /* Example calling guru C++ interface to FINUFFT library, single-prec, passing
@@ -40,42 +36,44 @@ int main(int argc, char *argv[])
   } else                          // or, NULL here means use default opts...
     finufftf_makeplan(type, dim, Ns, +1, ntransf, tol, &plan, NULL);
 
+  const std::complex<float> I(0.0f, 1.0f);
+
   // generate some random nonuniform points
-  vector<float> x(M);
+  std::vector<float> x(M);
   for (int j = 0; j < M; ++j)
-    x[j] = PI * (2 * ((float)rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
+    x[j] = PI * (2 * ((float)std::rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
   // note FINUFFT doesn't use std::vector types, so we need to make a pointer...
-  finufftf_setpts(plan, M, &x[0], NULL, NULL, 0, NULL, NULL, NULL);
+  finufftf_setpts(plan, M, &x[0], nullptr, nullptr, 0, nullptr, nullptr, nullptr);
 
   // generate some complex strengths
-  vector<complex<float>> c(M);
+  std::vector<std::complex<float>> c(M);
   for (int j = 0; j < M; ++j)
-    c[j] =
-        2 * ((float)rand() / RAND_MAX) - 1 + 1if * (2 * ((float)rand() / RAND_MAX) - 1);
+    c[j] = 2 * ((float)std::rand() / RAND_MAX) - 1 +
+           I * (2 * ((float)std::rand() / RAND_MAX) - 1);
 
   // alloc output array for the Fourier modes, then do the transform
-  vector<complex<float>> F(N);
+  std::vector<std::complex<float>> F(N);
   int ier = finufftf_execute(plan, &c[0], &F[0]);
 
   // for fun, do another with same NU pts (no re-sorting), but new strengths...
   for (int j = 0; j < M; ++j)
-    c[j] =
-        2 * ((float)rand() / RAND_MAX) - 1 + 1if * (2 * ((float)rand() / RAND_MAX) - 1);
+    c[j] = 2 * ((float)std::rand() / RAND_MAX) - 1 +
+           I * (2 * ((float)std::rand() / RAND_MAX) - 1);
   ier = finufftf_execute(plan, &c[0], &F[0]);
 
   finufftf_destroy(plan); // done with transforms of this size
 
   // rest is math checking and reporting...
   int n = 12519; // check the answer just for this mode, must be in [-N/2,N/2)
-  complex<float> Ftest = complex<float>(0, 0);
-  for (int j = 0; j < M; ++j) Ftest += c[j] * exp(1if * (float)n * x[j]);
+  std::complex<float> Ftest = std::complex<float>(0, 0);
+  for (int j = 0; j < M; ++j) Ftest += c[j] * std::exp(I * (float)n * x[j]);
   int nout   = n + N / 2; // index in output array for freq mode n
   float Fmax = 0.0;       // compute inf norm of F
   for (int m = 0; m < N; ++m) {
-    float aF = abs(F[m]);
+    float aF = std::abs(F[m]);
     if (aF > Fmax) Fmax = aF;
   }
-  float err = abs(F[nout] - Ftest) / Fmax;
+  float err = std::abs(F[nout] - Ftest) / Fmax;
   printf("guru 1D type-1 single-prec NUFFT done. ier=%d, rel err in F[%d] is %.3g\n", ier,
          n, err);
 
diff --git a/examples/guru2d1.cpp b/examples/guru2d1.cpp
index 91556bf77..ef2e5f6ca 100644
--- a/examples/guru2d1.cpp
+++ b/examples/guru2d1.cpp
@@ -1,10 +1,12 @@
 #include <finufft.h>
 
+#include <cmath>
 #include <complex>
+#include <cstdint>
+#include <cstdlib>
 #include <iomanip>
 #include <iostream>
 #include <vector>
-using namespace std;
 
 static const double PI = 3.141592653589793238462643383279502884;
 
@@ -20,34 +22,34 @@ int main(int argc, char *argv[]) {
   finufft_opts opts;
   finufft_default_opts(&opts);
   opts.upsampfac = 1.25;
-  complex<double> I(0.0, 1.0); // the imaginary unit
+  std::complex<double> I(0.0, 1.0); // the imaginary unit
 
   // generate random non-uniform points on (x,y) and complex strengths (c):
-  vector<double> x(M), y(M);
-  vector<complex<double>> c(M);
+  std::vector<double> x(M), y(M);
+  std::vector<std::complex<double>> c(M);
 
   for (int i = 0; i < M; i++) {
-    x[i] = PI * (2 * (double)rand() / RAND_MAX - 1); // uniform random in [-pi, pi)
-    y[i] = PI * (2 * (double)rand() / RAND_MAX - 1); // uniform random in [-pi, pi)
+    x[i] = PI * (2 * (double)std::rand() / RAND_MAX - 1); // uniform random in [-pi, pi)
+    y[i] = PI * (2 * (double)std::rand() / RAND_MAX - 1); // uniform random in [-pi, pi)
     // each component uniform random in [-1,1]
-    c[i] =
-        2 * ((double)rand() / RAND_MAX - 1) + I * (2 * ((double)rand() / RAND_MAX) - 1);
+    c[i] = 2 * ((double)std::rand() / RAND_MAX - 1) +
+           I * (2 * ((double)std::rand() / RAND_MAX) - 1);
   }
 
   // choose numbers of output Fourier coefficients in each dimension
-  int N1 = round(2.0 * sqrt(N));
-  int N2 = round(N / N1);
+  int N1 = (int)std::round(2.0 * std::sqrt(N));
+  int N2 = (int)std::round(N / N1);
 
   // output array for the Fourier modes
-  vector<complex<double>> F(N1 * N2);
+  std::vector<std::complex<double>> F(N1 * N2);
 
   int type = 1, dim = 2, ntrans = 1; // you could also do ntrans>1
   int64_t Ns[] = {N1, N2};           // N1,N2 as 64-bit int array
   // step 1: make a plan...
   finufft_plan plan;
-  int ier = finufft_makeplan(type, dim, Ns, +1, ntrans, tol, &plan, NULL);
+  int ier = finufft_makeplan(type, dim, Ns, +1, ntrans, tol, &plan, nullptr);
   // step 2: send in M nonuniform points (just x, y in this case)...
-  finufft_setpts(plan, M, &x[0], &y[0], NULL, 0, NULL, NULL, NULL);
+  finufft_setpts(plan, M, &x[0], &y[0], nullptr, 0, nullptr, nullptr, nullptr);
   // step 3: do the planned transform to the c strength data, output to F...
   finufft_execute(plan, &c[0], &F[0]);
   // ... you could now send in new points, and/or do transforms with new c data
@@ -55,17 +57,17 @@ int main(int argc, char *argv[]) {
   // step 4: free the memory used by the plan...
   finufft_destroy(plan);
 
-  int k1 = round(0.45 * N1); // check the answer for mode frequency (k1,k2)
-  int k2 = round(-0.35 * N2);
+  int k1 = (int)std::round(0.45 * N1); // check the answer for mode frequency (k1,k2)
+  int k2 = (int)std::round(-0.35 * N2);
 
-  complex<double> Ftest(0, 0);
+  std::complex<double> Ftest(0, 0);
   for (int j = 0; j < M; j++)
-    Ftest += c[j] * exp(I * ((double)k1 * x[j] + (double)k2 * y[j]));
+    Ftest += c[j] * std::exp(I * ((double)k1 * x[j] + (double)k2 * y[j]));
 
   // compute inf norm of F
   double Fmax = 0.0;
   for (int m = 0; m < N1 * N2; m++) {
-    double aF = abs(F[m]);
+    double aF = std::abs(F[m]);
     if (aF > Fmax) Fmax = aF;
   }
 
@@ -75,8 +77,8 @@ int main(int argc, char *argv[]) {
   int indexOut = k1out + k2out * (N1);
 
   // compute relative error
-  double err = abs(F[indexOut] - Ftest) / Fmax;
-  cout << "2D type-1 NUFFT done. ier=" << ier << ", err in F[" << indexOut
-       << "] rel to max(F) is " << setprecision(2) << err << endl;
+  double err = std::abs(F[indexOut] - Ftest) / Fmax;
+  std::cout << "2D type-1 NUFFT done. ier=" << ier << ", err in F[" << indexOut
+            << "] rel to max(F) is " << std::setprecision(2) << err << std::endl;
   return ier;
 }
diff --git a/examples/guru2d1_adjoint.cpp b/examples/guru2d1_adjoint.cpp
index 1dc42e11c..44eac27b9 100644
--- a/examples/guru2d1_adjoint.cpp
+++ b/examples/guru2d1_adjoint.cpp
@@ -1,10 +1,12 @@
 #include <finufft.h>
 
+#include <cmath>
 #include <complex>
+#include <cstdint>
+#include <cstdlib>
 #include <iomanip>
 #include <iostream>
 #include <vector>
-using namespace std;
 
 static const double PI = 3.141592653589793238462643383279502884;
 
@@ -25,35 +27,35 @@ int main(int argc, char *argv[]) {
   finufft_opts opts;
   finufft_default_opts(&opts);
   opts.upsampfac = 1.25;
-  complex<double> I(0.0, 1.0); // the imaginary unit
+  std::complex<double> I(0.0, 1.0); // the imaginary unit
 
   // generate random non-uniform points on (x,y) and complex strengths (c):
-  vector<double> x(M), y(M);
-  vector<complex<double>> c(M);
+  std::vector<double> x(M), y(M);
+  std::vector<std::complex<double>> c(M);
 
   for (int i = 0; i < M; i++) {
-    x[i] = PI * (2 * (double)rand() / RAND_MAX - 1); // uniform random in [-pi, pi)
-    y[i] = PI * (2 * (double)rand() / RAND_MAX - 1); // uniform random in [-pi, pi)
+    x[i] = PI * (2 * (double)std::rand() / RAND_MAX - 1); // uniform random in [-pi, pi)
+    y[i] = PI * (2 * (double)std::rand() / RAND_MAX - 1); // uniform random in [-pi, pi)
     // each component uniform random in [-1,1]
-    c[i] =
-        2 * ((double)rand() / RAND_MAX - 1) + I * (2 * ((double)rand() / RAND_MAX) - 1);
+    c[i] = 2 * ((double)std::rand() / RAND_MAX - 1) +
+           I * (2 * ((double)std::rand() / RAND_MAX) - 1);
   }
 
   // choose numbers of output Fourier coefficients in each dimension
-  int N1 = round(2.0 * sqrt(N));
-  int N2 = round(N / N1);
+  int N1 = (int)std::round(2.0 * std::sqrt(N));
+  int N2 = (int)std::round(N / N1);
 
   // output array for the Fourier modes
-  vector<complex<double>> F(N1 * N2);
+  std::vector<std::complex<double>> F(N1 * N2);
 
   int type = 2, dim = 2, ntrans = 1; // you could also do ntrans>1
   int64_t Ns[] = {N1, N2};           // N1,N2 as 64-bit int array
 
   // step 1: make a plan... note we choose isign=-1 for this type 2 plan
   finufft_plan plan;
-  int ier = finufft_makeplan(type, dim, Ns, -1, ntrans, tol, &plan, NULL);
+  int ier = finufft_makeplan(type, dim, Ns, -1, ntrans, tol, &plan, nullptr);
   // step 2: send in M nonuniform points (just x, y in this case)...
-  finufft_setpts(plan, M, &x[0], &y[0], NULL, 0, NULL, NULL, NULL);
+  finufft_setpts(plan, M, &x[0], &y[0], nullptr, 0, nullptr, nullptr, nullptr);
   // step 3: do the adjoint of the planned transform. This maps
   // c strength data, to F output, and is identical to the type 1 with isign=+1.
   finufft_execute_adjoint(plan, &c[0], &F[0]);
@@ -62,17 +64,17 @@ int main(int argc, char *argv[]) {
   // step 4: free the memory used by the plan...
   finufft_destroy(plan);
 
-  int k1 = round(0.45 * N1); // check the answer for mode frequency (k1,k2)
-  int k2 = round(-0.35 * N2);
+  int k1 = (int)std::round(0.45 * N1); // check the answer for mode frequency (k1,k2)
+  int k2 = (int)std::round(-0.35 * N2);
 
-  complex<double> Ftest(0, 0);
+  std::complex<double> Ftest(0, 0);
   for (int j = 0; j < M; j++)
-    Ftest += c[j] * exp(I * ((double)k1 * x[j] + (double)k2 * y[j]));
+    Ftest += c[j] * std::exp(I * ((double)k1 * x[j] + (double)k2 * y[j]));
 
   // compute inf norm of F
   double Fmax = 0.0;
   for (int m = 0; m < N1 * N2; m++) {
-    double aF = abs(F[m]);
+    double aF = std::abs(F[m]);
     if (aF > Fmax) Fmax = aF;
   }
 
@@ -82,8 +84,8 @@ int main(int argc, char *argv[]) {
   int indexOut = k1out + k2out * (N1);
 
   // compute relative error
-  double err = abs(F[indexOut] - Ftest) / Fmax;
-  cout << "2D adjoint-of-type-2 NUFFT done. ier=" << ier << ", err in F[" << indexOut
-       << "] rel to max(F) is " << setprecision(2) << err << endl;
+  double err = std::abs(F[indexOut] - Ftest) / Fmax;
+  std::cout << "2D adjoint-of-type-2 NUFFT done. ier=" << ier << ", err in F[" << indexOut
+            << "] rel to max(F) is " << std::setprecision(2) << err << std::endl;
   return ier;
 }
diff --git a/examples/gurumany1d1.cpp b/examples/gurumany1d1.cpp
index 7fceea176..d453f2042 100644
--- a/examples/gurumany1d1.cpp
+++ b/examples/gurumany1d1.cpp
@@ -11,17 +11,13 @@
 
 // specific to this demo...
 #include <cassert>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
 #include <vector>
 
-// only good for small projects...
-using namespace std;
-
 static const double PI = 3.141592653589793238462643383279502884;
-// allows 1i to be the imaginary unit... (C++14 onwards)
-using namespace std::complex_literals;
 
 int main(int argc, char *argv[]) {
   int M      = 2e5;          // number of nonuniform points
@@ -33,22 +29,23 @@ int main(int argc, char *argv[]) {
   int type = 1, dim = 1;     // 1d1
   int64_t Ns[3] = {N, 0, 0}; // guru describes mode array by vector [N1,N2..]
   finufft_plan plan;         // creates a plan struct (NULL below: default opts)
-  finufft_makeplan(type, dim, Ns, isign, ntrans, tol, &plan, NULL);
+  finufft_makeplan(type, dim, Ns, isign, ntrans, tol, &plan, nullptr);
 
   // generate random nonuniform points and pass to FINUFFT
-  vector<double> x(M);
+  std::vector<double> x(M);
   for (int j = 0; j < M; ++j)
-    x[j] = PI * (2 * ((double)rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
-  finufft_setpts(plan, M, x.data(), NULL, NULL, 0, NULL, NULL, NULL);
+    x[j] = PI * (2 * ((double)std::rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
+  finufft_setpts(plan, M, x.data(), nullptr, nullptr, 0, nullptr, nullptr, nullptr);
 
   // generate ntrans complex strength vectors each of length M (the slow bit!)
-  vector<complex<double>> c(M * ntrans); // plain contiguous storage
+  std::vector<std::complex<double>> c(M * ntrans); // plain contiguous storage
+  const std::complex<double> I(0.0, 1.0);
   for (int j = 0; j < M * ntrans; ++j)
-    c[j] =
-        2 * ((double)rand() / RAND_MAX) - 1 + 1i * (2 * ((double)rand() / RAND_MAX) - 1);
+    c[j] = 2 * ((double)std::rand() / RAND_MAX) - 1 +
+           I * (2 * ((double)std::rand() / RAND_MAX) - 1);
 
   // alloc output array for the Fourier modes, then do the transform
-  vector<complex<double>> F(N * ntrans);
+  std::vector<std::complex<double>> F(N * ntrans);
   printf("guru many 1D type-1 double-prec, tol=%.3g, executing %d transforms "
          "(vectorized), each size %d NU pts to %d modes...\n",
          tol, ntrans, M, N);
@@ -63,16 +60,16 @@ int main(int argc, char *argv[]) {
   int trans = 71;                                   // ...testing in just this transform
   assert(k >= -(double)N / 2 && k < (double)N / 2); // ensure meaningful test
   assert(trans >= 0 && trans < ntrans);
-  complex<double> Ftest = complex<double>(0, 0);
+  std::complex<double> Ftest = std::complex<double>(0, 0);
   for (int j = 0; j < M; ++j)
-    Ftest += c[j + M * trans] * exp(1i * (double)k * x[j]); // c offset to trans
+    Ftest += c[j + M * trans] * std::exp(I * (double)k * x[j]); // c offset to trans
   double Fmax = 0.0; // compute inf norm of F for selected transform
   for (int m = 0; m < N; ++m) {
-    double aF = abs(F[m + N * trans]);
+    double aF = std::abs(F[m + N * trans]);
     if (aF > Fmax) Fmax = aF;
   }
   int nout   = k + N / 2 + N * trans; // output index for freq mode k in the trans
-  double err = abs(F[nout] - Ftest) / Fmax;
+  double err = std::abs(F[nout] - Ftest) / Fmax;
   printf("\tdone: ier=%d; for transform %d, rel err in F[%d] is %.3g\n", ier, trans, k,
          err);
 
diff --git a/examples/many1d1.cpp b/examples/many1d1.cpp
index c8faef2ca..3dbb861b8 100644
--- a/examples/many1d1.cpp
+++ b/examples/many1d1.cpp
@@ -3,9 +3,8 @@
 #include <cassert>
 #include <complex>
 #include <cstdio>
-#include <stdlib.h>
+#include <cstdlib>
 #include <vector>
-using namespace std;
 
 static const double PI = 3.141592653589793238462643383279502884;
 
@@ -21,18 +20,18 @@ int main(int argc, char *argv[])
   double tol         = 1e-9;             // desired accuracy
   finufft_opts *opts = new finufft_opts; // opts is pointer to struct
   finufft_default_opts(opts);
-  complex<double> I = complex<double>(0.0, 1.0); // the imaginary unit
+  std::complex<double> I = std::complex<double>(0.0, 1.0); // the imaginary unit
 
   // generate some random nonuniform points (x) and complex strengths (c)...
-  vector<double> x(M);
-  vector<complex<double>> c(M * ntrans);
+  std::vector<double> x(M);
+  std::vector<std::complex<double>> c(M * ntrans);
   for (int j = 0; j < M; ++j)
-    x[j] = PI * (2 * ((double)rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
+    x[j] = PI * (2 * ((double)std::rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
   for (int j = 0; j < M * ntrans; ++j)                   // fill all ntrans vectors...
-    c[j] =
-        2 * ((double)rand() / RAND_MAX) - 1 + I * (2 * ((double)rand() / RAND_MAX) - 1);
+    c[j] = 2 * ((double)std::rand() / RAND_MAX) - 1 +
+           I * (2 * ((double)std::rand() / RAND_MAX) - 1);
   // allocate output array for the Fourier modes...
-  vector<complex<double>> F(N * ntrans);
+  std::vector<std::complex<double>> F(N * ntrans);
 
   // call the NUFFT (with iflag=+1): note pointers (not STL vecs) passed...
   int ier = finufft1d1many(ntrans, M, &x[0], &c[0], +1, tol, N, &F[0], NULL);
@@ -41,16 +40,16 @@ int main(int argc, char *argv[])
   int trans = ntrans - 1; // ...in this transform
   assert(k >= -(double)N / 2 && k < (double)N / 2);
 
-  complex<double> Ftest = complex<double>(0, 0);           // do the naive calc...
+  std::complex<double> Ftest = std::complex<double>(0, 0);           // do the naive calc...
   for (int j = 0; j < M; ++j)
-    Ftest += c[j + M * trans] * exp(I * (double)k * x[j]); // c from transform # trans
+    Ftest += c[j + M * trans] * std::exp(I * (double)k * x[j]); // c from transform # trans
   double Fmax = 0.0; // compute inf norm of F for transform # trans
   for (int m = 0; m < N; ++m) {
-    double aF = abs(F[m + N * trans]);
+    double aF = std::abs(F[m + N * trans]);
     if (aF > Fmax) Fmax = aF;
   }
   int kout   = k + N / 2 + N * trans; // output index, freq mode k, transform # trans
-  double err = abs(F[kout] - Ftest) / Fmax;
+  double err = std::abs(F[kout] - Ftest) / Fmax;
   printf("1D type-1 double-prec NUFFT done. ier=%d, rel err in F[%d] is %.3g\n", ier, k,
          err);
   return ier;
diff --git a/examples/simple1d1.cpp b/examples/simple1d1.cpp
index b7735fd67..a65c2db00 100644
--- a/examples/simple1d1.cpp
+++ b/examples/simple1d1.cpp
@@ -5,9 +5,8 @@
 #include <cassert>
 #include <complex>
 #include <cstdio>
-#include <stdlib.h>
+#include <cstdlib>
 #include <vector>
-using namespace std;
 
 static const double PI = 3.141592653589793238462643383279502884;
 
@@ -25,33 +24,33 @@ int main(int argc, char *argv[])
   double acc         = 1e-9;                     // desired accuracy
   finufft_opts *opts = new finufft_opts;         // opts is pointer to struct
   finufft_default_opts(opts);
-  complex<double> I = complex<double>(0.0, 1.0); // the imaginary unit
+  std::complex<double> I = std::complex<double>(0.0, 1.0); // the imaginary unit
 
   // generate some random nonuniform points (x) and complex strengths (c)...
-  vector<double> x(M);
-  vector<complex<double>> c(M);
+  std::vector<double> x(M);
+  std::vector<std::complex<double>> c(M);
   for (int j = 0; j < M; ++j) {
-    x[j] = PI * (2 * ((double)rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
+    x[j] = PI * (2 * ((double)std::rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
     c[j] =
-        2 * ((double)rand() / RAND_MAX) - 1 + I * (2 * ((double)rand() / RAND_MAX) - 1);
+        2 * ((double)std::rand() / RAND_MAX) - 1 + I * (2 * ((double)std::rand() / RAND_MAX) - 1);
   }
   // allocate output array for the Fourier modes...
-  vector<complex<double>> F(N);
+  std::vector<std::complex<double>> F(N);
 
   // call the NUFFT (with iflag=+1): note pointers (not STL vecs) passed...
   int ier = finufft1d1(M, &x[0], &c[0], +1, acc, N, &F[0], opts);
 
   int k = 142519; // check the answer just for this mode frequency...
   assert(k >= -(double)N / 2 && k < (double)N / 2);
-  complex<double> Ftest = complex<double>(0, 0);
-  for (int j = 0; j < M; ++j) Ftest += c[j] * exp(I * (double)k * x[j]);
+  std::complex<double> Ftest = std::complex<double>(0, 0);
+  for (int j = 0; j < M; ++j) Ftest += c[j] * std::exp(I * (double)k * x[j]);
   double Fmax = 0.0; // compute inf norm of F
   for (int m = 0; m < N; ++m) {
-    double aF = abs(F[m]);
+    double aF = std::abs(F[m]);
     if (aF > Fmax) Fmax = aF;
   }
   int kout   = k + N / 2; // index in output array for freq mode k
-  double err = abs(F[kout] - Ftest) / Fmax;
+  double err = std::abs(F[kout] - Ftest) / Fmax;
   printf("1D type-1 double-prec NUFFT done. ier=%d, rel err in F[%d] is %.3g\n", ier, k,
          err);
   return ier;
diff --git a/examples/simple1d1f.cpp b/examples/simple1d1f.cpp
index 359446be3..8d04a69a8 100644
--- a/examples/simple1d1f.cpp
+++ b/examples/simple1d1f.cpp
@@ -4,10 +4,9 @@
 // also needed for this example...
 #include <cassert>
 #include <complex>
-#include <stdio.h>
-#include <stdlib.h>
+#include <cstdio>
+#include <cstdlib>
 #include <vector>
-using namespace std;
 
 static const double PI = 3.141592653589793238462643383279502884;
 
@@ -23,32 +22,32 @@ int main(int argc, char *argv[])
   float acc          = 1e-3; // desired accuracy
   finufft_opts *opts = new finufft_opts;       // opts is pointer to struct
   finufftf_default_opts(opts);                 // note finufft "f" suffix
-  complex<float> I = complex<float>(0.0, 1.0); // the imaginary unit
+  std::complex<float> I = std::complex<float>(0.0, 1.0); // the imaginary unit
 
   // generate some random nonuniform points (x) and complex strengths (c)...
-  vector<float> x(M);
-  vector<complex<float>> c(M);
+  std::vector<float> x(M);
+  std::vector<std::complex<float>> c(M);
   for (int j = 0; j < M; ++j) {
-    x[j] = PI * (2 * ((float)rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
-    c[j] = 2 * ((float)rand() / RAND_MAX) - 1 + I * (2 * ((float)rand() / RAND_MAX) - 1);
+    x[j] = PI * (2 * ((float)std::rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
+    c[j] = 2 * ((float)std::rand() / RAND_MAX) - 1 + I * (2 * ((float)std::rand() / RAND_MAX) - 1);
   }
   // allocate output array for the Fourier modes...
-  vector<complex<float>> F(N);
+  std::vector<std::complex<float>> F(N);
 
   // call the NUFFT (with iflag=+1): note pointers (not STL vecs) passed...
   int ier = finufftf1d1(M, &x[0], &c[0], +1, acc, N, &F[0], opts); // note "f"
 
   int k = 14251; // check the answer just for this mode...
   assert(k >= -(double)N / 2 && k < (double)N / 2);
-  complex<float> Ftest = complex<float>(0, 0);
-  for (int j = 0; j < M; ++j) Ftest += c[j] * exp(I * (float)k * x[j]);
+  std::complex<float> Ftest = std::complex<float>(0, 0);
+  for (int j = 0; j < M; ++j) Ftest += c[j] * std::exp(I * (float)k * x[j]);
   float Fmax = 0.0; // compute inf norm of F
   for (int m = 0; m < N; ++m) {
-    float aF = abs(F[m]);
+    float aF = std::abs(F[m]);
     if (aF > Fmax) Fmax = aF;
   }
   int kout  = k + N / 2; // index in output array for freq mode k
-  float err = abs(F[kout] - Ftest) / Fmax;
+  float err = std::abs(F[kout] - Ftest) / Fmax;
   printf("1D type-1 single-prec NUFFT done. ier=%d, rel err in F[%d] is %.3g\n", ier, k,
          err);
   return ier;
diff --git a/examples/simple2d1.cpp b/examples/simple2d1.cpp
index be7524e79..772d95c09 100644
--- a/examples/simple2d1.cpp
+++ b/examples/simple2d1.cpp
@@ -3,10 +3,11 @@
 #include <finufft.h>
 
 // also needed for this example...
+#include <cmath>
+#include <cstdlib>
 #include <iomanip>
 #include <iostream>
 #include <vector>
-using namespace std;
 
 static const double PI = 3.141592653589793238462643383279502884;
 
@@ -22,43 +23,43 @@ int main(int argc, char *argv[]) {
   double tol = 1e-6; // desired accuracy
   finufft_opts opts;
   finufft_default_opts(&opts);
-  complex<double> I(0.0, 1.0); // the imaginary unit
+  std::complex<double> I(0.0, 1.0); // the imaginary unit
 
   // generate random non-uniform points on (x,y) and complex strengths (c):
-  vector<double> x(M), y(M);
-  vector<complex<double>> c(M);
+  std::vector<double> x(M), y(M);
+  std::vector<std::complex<double>> c(M);
 
   for (int i = 0; i < M; i++) {
-    x[i] = PI * (2 * (double)rand() / RAND_MAX - 1); // uniform random in [-pi, pi)
-    y[i] = PI * (2 * (double)rand() / RAND_MAX - 1); // uniform random in [-pi, pi)
+    x[i] = PI * (2 * (double)std::rand() / RAND_MAX - 1); // uniform random in [-pi, pi)
+    y[i] = PI * (2 * (double)std::rand() / RAND_MAX - 1); // uniform random in [-pi, pi)
 
     // each component uniform random in [-1,1]
-    c[i] =
-        2 * ((double)rand() / RAND_MAX - 1) + I * (2 * ((double)rand() / RAND_MAX) - 1);
+    c[i] = 2 * ((double)std::rand() / RAND_MAX - 1) +
+           I * (2 * ((double)std::rand() / RAND_MAX) - 1);
   }
 
   // choose numbers of output Fourier coefficients in each dimension
-  int N1 = round(2.0 * sqrt(N));
-  int N2 = round(N / N1);
+  int N1 = (int)std::round(2.0 * std::sqrt(N));
+  int N2 = (int)std::round(N / N1);
 
   // output array for the Fourier modes
-  vector<complex<double>> F(N1 * N2);
+  std::vector<std::complex<double>> F(N1 * N2);
 
   // call the NUFFT (with iflag += 1): note passing in pointers...
   opts.upsampfac = 1.25;
   int ier        = finufft2d1(M, &x[0], &y[0], &c[0], 1, tol, N1, N2, &F[0], &opts);
 
-  int k1 = round(0.45 * N1); // check the answer for mode frequency (k1,k2)
-  int k2 = round(-0.35 * N2);
+  int k1 = (int)std::round(0.45 * N1); // check the answer for mode frequency (k1,k2)
+  int k2 = (int)std::round(-0.35 * N2);
 
-  complex<double> Ftest(0, 0);
+  std::complex<double> Ftest(0, 0);
   for (int j = 0; j < M; j++)
-    Ftest += c[j] * exp(I * ((double)k1 * x[j] + (double)k2 * y[j]));
+    Ftest += c[j] * std::exp(I * ((double)k1 * x[j] + (double)k2 * y[j]));
 
   // compute inf norm of F
   double Fmax = 0.0;
   for (int m = 0; m < N1 * N2; m++) {
-    double aF = abs(F[m]);
+    double aF = std::abs(F[m]);
     if (aF > Fmax) Fmax = aF;
   }
 
@@ -68,8 +69,8 @@ int main(int argc, char *argv[]) {
   int indexOut = k1out + k2out * (N1);
 
   // compute relative error
-  double err = abs(F[indexOut] - Ftest) / Fmax;
-  cout << "2D type-1 NUFFT done. ier=" << ier << ", err in F[" << indexOut
-       << "] rel to max(F) is " << setprecision(2) << err << endl;
+  double err = std::abs(F[indexOut] - Ftest) / Fmax;
+  std::cout << "2D type-1 NUFFT done. ier=" << ier << ", err in F[" << indexOut
+            << "] rel to max(F) is " << std::setprecision(2) << err << std::endl;
   return ier;
 }
diff --git a/examples/simulplans1d1.cpp b/examples/simulplans1d1.cpp
index 6f6cc4c9b..f5dbbba73 100644
--- a/examples/simulplans1d1.cpp
+++ b/examples/simulplans1d1.cpp
@@ -14,20 +14,22 @@
 #include <cassert>
 #include <complex>
 #include <cstdio>
-#include <stdlib.h>
+#include <cstdlib>
+#include <cstdint>
+#include <cmath>
 #include <vector>
-using namespace std;
 
 static const double PI = 3.141592653589793238462643383279502884;
 
-void strengths(vector<complex<double>> &c) { // fill random complex array
+void strengths(std::vector<std::complex<double>> &c) { // fill random complex array
+  const std::complex<double> I(0.0, 1.0);
   for (long unsigned int j = 0; j < c.size(); ++j)
-    c[j] =
-        2 * ((double)rand() / RAND_MAX) - 1 + 1i * (2 * ((double)rand() / RAND_MAX) - 1);
+    c[j] = 2 * ((double)std::rand() / RAND_MAX) - 1 +
+           I * (2 * ((double)std::rand() / RAND_MAX) - 1);
 }
 
-double chk1d1(int n, vector<double> &x, vector<complex<double>> &c,
-              vector<complex<double>> &F)
+double chk1d1(int n, std::vector<double> &x, std::vector<std::complex<double>> &c,
+              std::vector<std::complex<double>> &F)
 // return error in output array F, for n'th mode only, rel to ||F||_inf
 {
   int N = F.size();
@@ -35,16 +37,17 @@ double chk1d1(int n, vector<double> &x, vector<complex<double>> &c,
     printf("n out of bounds!\n");
     return NAN;
   }
-  complex<double> Ftest = complex<double>(0, 0);
+  std::complex<double> Ftest = std::complex<double>(0, 0);
+  const std::complex<double> I(0.0, 1.0);
   for (long unsigned int j = 0; j < x.size(); ++j)
-    Ftest += c[j] * exp(1i * (double)n * x[j]);
+    Ftest += c[j] * std::exp(I * (double)n * x[j]);
   int nout    = n + N / 2; // index in output array for freq mode n
   double Fmax = 0.0;       // compute inf norm of F
   for (int m = 0; m < N; ++m) {
-    double aF = abs(F[m]);
+    double aF = std::abs(F[m]);
     if (aF > Fmax) Fmax = aF;
   }
-  return abs(F[nout] - Ftest) / Fmax;
+  return std::abs(F[nout] - Ftest) / Fmax;
 }
 
 int main(int argc, char *argv[]) {
@@ -60,28 +63,28 @@ int main(int argc, char *argv[]) {
 
   finufft_plan planA, planB; // creates plan structs
   Ns[0] = NA;
-  finufft_makeplan(type, dim, Ns, +1, ntransf, tol, &planA, NULL);
+  finufft_makeplan(type, dim, Ns, +1, ntransf, tol, &planA, nullptr);
   Ns[0] = NB;
-  finufft_makeplan(type, dim, Ns, +1, ntransf, tol, &planB, NULL);
+  finufft_makeplan(type, dim, Ns, +1, ntransf, tol, &planB, nullptr);
 
   // generate some random nonuniform points
-  vector<double> xA(MA), xB(MB);
+  std::vector<double> xA(MA), xB(MB);
   for (int j = 0; j < MA; ++j)
-    xA[j] = PI * (2 * ((double)rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
+    xA[j] = PI * (2 * ((double)std::rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
   for (int j = 0; j < MB; ++j)
-    xB[j] = PI * (2 * ((double)rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
+    xB[j] = PI * (2 * ((double)std::rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
 
   // note FINUFFT doesn't use std::vector types, so we need to make a pointer...
-  finufft_setpts(planA, MA, &xA[0], NULL, NULL, 0, NULL, NULL, NULL);
-  finufft_setpts(planB, MB, &xB[0], NULL, NULL, 0, NULL, NULL, NULL);
+  finufft_setpts(planA, MA, &xA[0], nullptr, nullptr, 0, nullptr, nullptr, nullptr);
+  finufft_setpts(planB, MB, &xB[0], nullptr, nullptr, 0, nullptr, nullptr, nullptr);
 
   // generate some complex strengths
-  vector<complex<double>> cA(MA), cB(MB);
+  std::vector<std::complex<double>> cA(MA), cB(MB);
   strengths(cA);
   strengths(cB);
 
   // allocate output arrays for the Fourier modes...
-  vector<complex<double>> FA(NA), FB(NB);
+  std::vector<std::complex<double>> FA(NA), FB(NB);
   int ierA = finufft_execute(planA, &cA[0], &FA[0]);
   int ierB = finufft_execute(planB, &cB[0], &FB[0]);
 
diff --git a/examples/spreadinterponly1d.cpp b/examples/spreadinterponly1d.cpp
index 03b0cf670..8a32490d4 100644
--- a/examples/spreadinterponly1d.cpp
+++ b/examples/spreadinterponly1d.cpp
@@ -6,12 +6,11 @@
 #include <chrono>
 #include <complex>
 #include <cstdio>
-#include <stdlib.h>
+#include <cstdlib>
 #include <vector>
-using namespace std;
+#include <algorithm>
 
 static const double PI = 3.141592653589793238462643383279502884;
-using namespace std::chrono;
 
 int main(int argc, char *argv[])
 /* Example of double-prec spread/interp only tasks, with basic math tests.
@@ -39,10 +38,10 @@ int main(int argc, char *argv[])
   opts.upsampfac        = 2.0;  // pretend upsampling factor (really no upsampling)
   // opts.spread_kerevalmeth = 0;  // would be needed for any nonstd upsampfac
 
-  complex<double> I = complex<double>(0.0, 1.0); // the imaginary unit
-  vector<double> x(M);                           // input
-  vector<complex<double>> c(M);                  // input
-  vector<complex<double>> F(N);                  // output (spread to this array)
+  std::complex<double> I = std::complex<double>(0.0, 1.0); // the imaginary unit
+  std::vector<double> x(M);                                // input
+  std::vector<std::complex<double>> c(M);                  // input
+  std::vector<std::complex<double>> F(N);                  // output (spread to this array)
 
   // first spread M=1 single unit-strength at the origin, only to get its total mass...
   x[0]       = 0.0;
@@ -50,40 +49,40 @@ int main(int argc, char *argv[])
   int unused = 1;
   int ier = finufft1d1(1, x.data(), c.data(), unused, tol, N, F.data(), &opts); // warm-up
   if (ier > 1) return ier;
-  complex<double> kersum = 0.0;
+  std::complex<double> kersum = 0.0;
   for (auto Fk : F) kersum += Fk; // kernel mass
 
   // Now generate random nonuniform points (x) and complex strengths (c)...
   for (int j = 0; j < M; ++j) {
-    x[j] = PI * (2 * ((double)rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
-    c[j] =
-        2 * ((double)rand() / RAND_MAX) - 1 + I * (2 * ((double)rand() / RAND_MAX) - 1);
+    x[j] = PI * (2 * ((double)std::rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
+    c[j] = 2 * ((double)std::rand() / RAND_MAX) - 1 +
+           I * (2 * ((double)std::rand() / RAND_MAX) - 1);
   }
 
   opts.debug = 1;
-  auto t0    = steady_clock::now(); // now spread with all M pts... (dir=1)
+  auto t0    = std::chrono::steady_clock::now(); // now spread with all M pts... (dir=1)
   ier      = finufft1d1(M, x.data(), c.data(), unused, tol, N, F.data(), &opts); // do it
-  double t = (steady_clock::now() - t0) / 1.0s;
+  double t = std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count();
   if (ier > 1) return ier;
-  complex<double> csum = 0.0; // tot input strength
+  std::complex<double> csum = 0.0; // tot input strength
   for (auto cj : c) csum += cj;
-  complex<double> mass = 0.0; // tot output mass
+  std::complex<double> mass = 0.0; // tot output mass
   for (auto Fk : F) mass += Fk;
-  double relerr = abs(mass - kersum * csum) / abs(mass);
+  double relerr = std::abs(mass - kersum * csum) / std::abs(mass);
   printf("1D spread-only, double-prec, %.3g s (%.3g NU pt/sec), ier=%d, mass err %.3g\n",
          t, M / t, ier, relerr);
 
-  for (auto &Fk : F) Fk = complex<double>{1.0, 0.0}; // unit grid input
+  for (auto &Fk : F) Fk = std::complex<double>{1.0, 0.0}; // unit grid input
   opts.debug = 0;
-  t0         = steady_clock::now(); // now interp to all M pts...  (dir=2)
+  t0         = std::chrono::steady_clock::now(); // now interp to all M pts...  (dir=2)
   ier = finufft1d2(M, x.data(), c.data(), unused, tol, N, F.data(), &opts); // do it
-  t   = (steady_clock::now() - t0) / 1.0s;
+  t   = std::chrono::duration<double>(std::chrono::steady_clock::now() - t0).count();
   if (ier > 1) return ier;
   csum = 0.0; // tot output
   for (auto cj : c) csum += cj;
   double maxerr = 0.0;
-  for (auto cj : c) maxerr = max(maxerr, abs(cj - kersum));
+  for (auto cj : c) maxerr = std::max(maxerr, std::abs(cj - kersum));
   printf("1D interp-only, double-prec, %.3g s (%.3g NU pt/sec), ier=%d, max err %.3g\n",
-         t, M / t, ier, maxerr / abs(kersum));
+         t, M / t, ier, maxerr / std::abs(kersum));
   return 0;
 }
diff --git a/examples/threadsafe1d1.cpp b/examples/threadsafe1d1.cpp
index 9b8d7f79b..8dfc98c79 100644
--- a/examples/threadsafe1d1.cpp
+++ b/examples/threadsafe1d1.cpp
@@ -5,10 +5,9 @@
 #include <cassert>
 #include <complex>
 #include <cstdio>
+#include <cstdlib>
 #include <omp.h>
-#include <stdlib.h>
 #include <vector>
-using namespace std;
 
 static const double PI = 3.141592653589793238462643383279502884;
 
@@ -27,7 +26,7 @@ int main(int argc, char *argv[])
   double acc         = 1e-9;                     // desired accuracy
   finufft_opts *opts = new finufft_opts;         // opts is pointer to struct
   finufft_default_opts(opts);
-  complex<double> I = complex<double>(0.0, 1.0); // the imaginary unit
+  std::complex<double> I = std::complex<double>(0.0, 1.0); // the imaginary unit
 
   opts->nthreads = 1; // *crucial* so that each call single-thread (otherwise segfaults)
 
@@ -38,31 +37,31 @@ int main(int argc, char *argv[])
     // Note that these are local to the thread (if you have the *same* sets of
     // NU pts x for each thread, consider instead using one vectorized multithreaded
     // transform, which would be faster).
-    vector<double> x(M);
-    vector<complex<double>> c(M);
+    std::vector<double> x(M);
+    std::vector<std::complex<double>> c(M);
     for (int j = 0; j < M; ++j) {
-      x[j] = PI * (2 * ((double)rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
-      c[j] =
-          2 * ((double)rand() / RAND_MAX) - 1 + I * (2 * ((double)rand() / RAND_MAX) - 1);
+      x[j] = PI * (2 * ((double)std::rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
+      c[j] = 2 * ((double)std::rand() / RAND_MAX) - 1 +
+             I * (2 * ((double)std::rand() / RAND_MAX) - 1);
     }
 
     // allocate output array for the Fourier modes... local to the thread
-    vector<complex<double>> F(N);
+    std::vector<std::complex<double>> F(N);
 
     // call the NUFFT (with iflag=+1): note pointers (not STL vecs) passed...
     int ier = finufft1d1(M, &x[0], &c[0], +1, acc, N, &F[0], opts);
 
     int k = 42519; // check the answer just for this mode frequency...
     assert(k >= -(double)N / 2 && k < (double)N / 2);
-    complex<double> Ftest = complex<double>(0, 0);
-    for (int j = 0; j < M; ++j) Ftest += c[j] * exp(I * (double)k * x[j]);
+    std::complex<double> Ftest = std::complex<double>(0, 0);
+    for (int j = 0; j < M; ++j) Ftest += c[j] * std::exp(I * (double)k * x[j]);
     double Fmax = 0.0; // compute inf norm of F
     for (int m = 0; m < N; ++m) {
-      double aF = abs(F[m]);
+      double aF = std::abs(F[m]);
       if (aF > Fmax) Fmax = aF;
     }
     int kout   = k + N / 2; // index in output array for freq mode k
-    double err = abs(F[kout] - Ftest) / Fmax;
+    double err = std::abs(F[kout] - Ftest) / Fmax;
 
     printf("[thread %2d] 1D t-1 dbl-prec NUFFT done. ier=%d, rel err in F[%d]: %.3g\n",
            omp_get_thread_num(), ier, k, err);
diff --git a/examples/threadsafe2d2f.cpp b/examples/threadsafe2d2f.cpp
index 8c02604df..2e337cccb 100644
--- a/examples/threadsafe2d2f.cpp
+++ b/examples/threadsafe2d2f.cpp
@@ -21,7 +21,6 @@
 #include <iostream>
 #include <omp.h>
 #include <vector>
-using namespace std;
 
 int test_finufft(finufft_opts *opts)
 // self-contained small test that one single-prec FINUFFT2D2 has no error/crash
diff --git a/include/cufinufft/spreadinterp.h b/include/cufinufft/spreadinterp.h
index 97eab62f7..88b912f44 100644
--- a/include/cufinufft/spreadinterp.h
+++ b/include/cufinufft/spreadinterp.h
@@ -64,11 +64,12 @@ static inline T evaluate_kernel(T x, const finufft_spread_opts &opts)
    approximation to prolate spheroidal wavefunction (PSWF) of order 0.
    This is the "reference implementation", used by eg common/onedim_* 2/17/17 */
 {
-  if (abs(x) >= T(opts.ES_halfwidth))
+  if (std::abs(x) >= T(opts.ES_halfwidth))
     // if spreading/FT careful, shouldn't need this if, but causes no speed hit
     return 0.0;
   else
-    return exp((T)opts.ES_beta * (sqrt((T)1.0 - (T)opts.ES_c * x * x) - (T)1.0));
+    return std::exp(
+        (T)opts.ES_beta * (std::sqrt((T)1.0 - (T)opts.ES_c * x * x) - (T)1.0));
 }
 
 template<typename T>
@@ -84,8 +85,8 @@ static __forceinline__ __device__ T evaluate_kernel(T x, T es_c, T es_beta)
    This is the "reference implementation", used by eg common/onedim_*
     2/17/17 */
 {
-  return abs(x) < ns / T(2.0)
-             ? exp((T)es_beta * (sqrt((T)1.0 - (T)es_c * x * x) - (T)1.0))
+  return std::abs(x) < ns / T(2.0)
+             ? std::exp((T)es_beta * (std::sqrt((T)1.0 - (T)es_c * x * x) - (T)1.0))
              : 0.0;
 }
 
@@ -113,7 +114,7 @@ template<typename T, int w>
 static __inline__ __device__ void eval_kernel_vec(T *ker, const T x, const T es_c,
                                                   const T es_beta) {
   for (int i = 0; i < w; i++) {
-    ker[i] = evaluate_kernel<T, w>(abs(x + i), es_c, es_beta);
+    ker[i] = evaluate_kernel<T, w>(std::abs(x + i), es_c, es_beta);
   }
 }
 
diff --git a/perftest/big2d2f.cpp b/perftest/big2d2f.cpp
index 1a87067d2..c2645e417 100644
--- a/perftest/big2d2f.cpp
+++ b/perftest/big2d2f.cpp
@@ -14,7 +14,6 @@
 #include <iostream>
 #include <omp.h>
 #include <vector>
-using namespace std;
 
 int test_finufft(finufft_opts *opts) {
   size_t nj = 129 * 129 * 2;
diff --git a/perftest/guru_timing_test.cpp b/perftest/guru_timing_test.cpp
index a20b4112e..4e6df5a36 100644
--- a/perftest/guru_timing_test.cpp
+++ b/perftest/guru_timing_test.cpp
@@ -1,5 +1,6 @@
 #include "finufft/finufft_utils.hpp"
 #include <finufft/test_defs.h>
+#include <memory>
 
 // for sleep call
 #if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__)
@@ -8,7 +9,6 @@ void sleep(unsigned long seconds) { Sleep(seconds * 1000); }
 #else
 #include <unistd.h>
 #endif
-using namespace std;
 using namespace finufft;
 using namespace finufft::utils;
 
@@ -59,91 +59,108 @@ int main(int argc, char *argv[])
 
   // Collect command line arguments ------------------------------------------
   if (argc < 8 || argc > 14) {
-    fprintf(
+    std::fprintf(
         stderr,
         "Usage: guru_timing_test ntransf type ndim N1 N2 N3 Nsrc [tol [debug "
         "[spread_thread [maxbatchsize [spread_sort "
         "[upsampfac]]]]]]\n\teg:\tguru_timing_test 100 1 2 1e2 1e2 0 1e6 1e-3 1 0 0 2\n");
     return 1;
   }
-  sscanf(argv[1], "%d", &ntransf);
-  sscanf(argv[2], "%d", &type);
-  sscanf(argv[3], "%d", &ndim);
-  sscanf(argv[4], "%lf", &w);
+  std::sscanf(argv[1], "%d", &ntransf);
+  std::sscanf(argv[2], "%d", &type);
+  std::sscanf(argv[3], "%d", &ndim);
+  std::sscanf(argv[4], "%lf", &w);
   N1 = (BIGINT)w;
-  sscanf(argv[5], "%lf", &w);
+  std::sscanf(argv[5], "%lf", &w);
   N2 = (BIGINT)w;
-  sscanf(argv[6], "%lf", &w);
+  std::sscanf(argv[6], "%lf", &w);
   N3 = (BIGINT)w;
-  sscanf(argv[7], "%lf", &w);
+  std::sscanf(argv[7], "%lf", &w);
   M = (BIGINT)w;
-  if (argc > 8) sscanf(argv[8], "%lf", &tol);
-  if (argc > 9) sscanf(argv[9], "%d", &opts.debug);
+  if (argc > 8) std::sscanf(argv[8], "%lf", &tol);
+  if (argc > 9) std::sscanf(argv[9], "%d", &opts.debug);
   opts.spread_debug = (opts.debug > 1) ? 1 : 0; // see output from spreader
-  if (argc > 10) sscanf(argv[10], "%d", &opts.spread_thread);
-  if (argc > 11) sscanf(argv[11], "%d", &opts.maxbatchsize);
-  if (argc > 12) sscanf(argv[12], "%d", &opts.spread_sort);
+  if (argc > 10) std::sscanf(argv[10], "%d", &opts.spread_thread);
+  if (argc > 11) std::sscanf(argv[11], "%d", &opts.maxbatchsize);
+  if (argc > 12) std::sscanf(argv[12], "%d", &opts.spread_sort);
   if (argc > 13) {
-    sscanf(argv[13], "%lf", &w);
+    std::sscanf(argv[13], "%lf", &w);
     opts.upsampfac = (FLT)w;
   }
 
   // Allocate and initialize input -------------------------------------------
-  cout << scientific << setprecision(15);
+  std::cout << std::scientific << std::setprecision(15);
   N2       = (N2 == 0) ? 1 : N2;
   N3       = (N3 == 0) ? 1 : N3;
   BIGINT N = N1 * N2 * N3;
 
-  FLT *s = NULL;
-  FLT *t = NULL;
-  FLT *u = NULL;
+  std::unique_ptr<FLT[]> s;
+  std::unique_ptr<FLT[]> t;
+  std::unique_ptr<FLT[]> u;
   if (type == 3) { // make target freq NU pts for type 3 (N of them)...
-    s      = (FLT *)malloc(sizeof(FLT) * N); // targ freqs (1-cmpt)
-    FLT S1 = (FLT)N1 / 2;
+    s = std::make_unique<FLT[]>(N); // targ freqs (1-cmpt)
+    FLT *s_ptr = s.get();
+    FLT S1     = (FLT)N1 / 2;
 #pragma omp parallel
     {
       unsigned int se = MY_OMP_GET_THREAD_NUM(); // needed for parallel random #s
 #pragma omp for schedule(dynamic, TEST_RANDCHUNK)
       for (BIGINT k = 0; k < N; ++k) {
-        s[k] = S1 * (1.7 + randm11r(&se)); // note the offset, to test type 3.
+        s_ptr[k] = S1 * (1.7 + randm11r(&se)); // note the offset, to test type 3.
       }
-      if (ndim > 1) {
-        t      = (FLT *)malloc(sizeof(FLT) * N); // targ freqs (2-cmpt)
-        FLT S2 = (FLT)N2 / 2;
+    }
+    if (ndim > 1) {
+      t = std::make_unique<FLT[]>(N); // targ freqs (2-cmpt)
+      FLT *t_ptr = t.get();
+      FLT S2     = (FLT)N2 / 2;
+#pragma omp parallel
+      {
+        unsigned int se = MY_OMP_GET_THREAD_NUM(); // needed for parallel random #s
 #pragma omp for schedule(dynamic, TEST_RANDCHUNK)
         for (BIGINT k = 0; k < N; ++k) {
-          t[k] = S2 * (-0.5 + randm11r(&se));
+          t_ptr[k] = S2 * (-0.5 + randm11r(&se));
         }
       }
-      if (ndim > 2) {
-        u      = (FLT *)malloc(sizeof(FLT) * N); // targ freqs (3-cmpt)
-        FLT S3 = (FLT)N3 / 2;
+    }
+    if (ndim > 2) {
+      u = std::make_unique<FLT[]>(N); // targ freqs (3-cmpt)
+      FLT *u_ptr = u.get();
+      FLT S3     = (FLT)N3 / 2;
+#pragma omp parallel
+      {
+        unsigned int se = MY_OMP_GET_THREAD_NUM(); // needed for parallel random #s
 #pragma omp for schedule(dynamic, TEST_RANDCHUNK)
         for (BIGINT k = 0; k < N; ++k) {
-          u[k] = S3 * (0.9 + randm11r(&se));
+          u_ptr[k] = S3 * (0.9 + randm11r(&se));
         }
       }
     }
   }
 
-  CPX *c = (CPX *)malloc(sizeof(CPX) * M * ntransf);             // strengths
-  CPX *F = (CPX *)malloc(sizeof(CPX) * N * ntransf);             // mode ampls
-
-  FLT *x = (FLT *)malloc(sizeof(FLT) * M), *y = NULL, *z = NULL; // NU pts x coords
-  if (ndim > 1) y = (FLT *)malloc(sizeof(FLT) * M);              // NU pts y coords
-  if (ndim > 2) z = (FLT *)malloc(sizeof(FLT) * M);              // NU pts z coords
+  auto c = std::make_unique<CPX[]>(M * ntransf); // strengths
+  auto F = std::make_unique<CPX[]>(N * ntransf); // mode ampls
+
+  auto x = std::make_unique<FLT[]>(M);
+  std::unique_ptr<FLT[]> y;
+  std::unique_ptr<FLT[]> z; // NU pts coords
+  if (ndim > 1) y = std::make_unique<FLT[]>(M); // NU pts y coords
+  if (ndim > 2) z = std::make_unique<FLT[]>(M); // NU pts z coords
+  FLT *x_ptr = x.get();
+  FLT *y_ptr = y.get();
+  FLT *z_ptr = z.get();
+  CPX *c_ptr = c.get();
 #pragma omp parallel
   {
     unsigned int se = MY_OMP_GET_THREAD_NUM(); // needed for parallel random #s
 #pragma omp for schedule(dynamic, TEST_RANDCHUNK)
     for (BIGINT j = 0; j < M; ++j) {
-      x[j] = PI * randm11r(&se);
-      if (y) y[j] = PI * randm11r(&se);
-      if (z) z[j] = PI * randm11r(&se);
+      x_ptr[j] = PI * randm11r(&se);
+      if (y_ptr) y_ptr[j] = PI * randm11r(&se);
+      if (z_ptr) z_ptr[j] = PI * randm11r(&se);
     }
 #pragma omp for schedule(dynamic, TEST_RANDCHUNK)
     for (BIGINT i = 0; i < ntransf * M; i++) // random strengths
-      c[i] = crandm11r(&se);
+      c_ptr[i] = crandm11r(&se);
   }
 
   // Andrea found the following are needed to get reliable independent timings:
@@ -153,8 +170,11 @@ int main(int argc, char *argv[])
   // std::this_thread::sleep_for(std::chrono::seconds(1));
   sleep(tsleep);
 
-  printf("FINUFFT %dd%d use guru interface to do %d calls together:-------------------\n",
-         ndim, type, ntransf);
+  std::printf(
+      "FINUFFT %dd%d use guru interface to do %d calls together:-------------------\n",
+      ndim,
+      type,
+      ntransf);
   FINUFFT_PLAN plan;                // instantiate a finufft_plan
   CNTime timer;
   timer.start();                    // Guru Step 1
@@ -163,45 +183,63 @@ int main(int argc, char *argv[])
   // (NB: the opts struct can no longer be modified with effect!)
   double plan_t = timer.elapsedsec();
   if (ier > 1) {
-    printf("error (ier=%d)!\n", ier);
+    std::printf("error (ier=%d)!\n", ier);
     return ier;
   } else {
     if (type != 3)
-      printf("\tplan, for %lld modes: \t\t%.3g s\n", (long long)N, plan_t);
+      std::printf("\tplan, for %lld modes: \t\t%.3g s\n", (long long)N, plan_t);
     else
-      printf("\tplan:\t\t\t\t\t%.3g s\n", plan_t);
+      std::printf("\tplan:\t\t\t\t\t%.3g s\n", plan_t);
   }
 
   timer.restart();                                              // Guru Step 2
-  ier           = FINUFFT_SETPTS(plan, M, x, y, z, N, s, t, u); //(t1,2: N,s,t,u ignored)
+  ier = FINUFFT_SETPTS(plan,
+                       M,
+                       x.get(),
+                       y.get(),
+                       z.get(),
+                       N,
+                       s.get(),
+                       t.get(),
+                       u.get()); //(t1,2: N,s,t,u ignored)
   double sort_t = timer.elapsedsec();
   if (ier) {
-    printf("error (ier=%d)!\n", ier);
+    std::printf("error (ier=%d)!\n", ier);
     return ier;
   } else {
     if (type != 3)
-      printf("\tsetpts for %lld NU pts: \t\t%.3g s\n", (long long)M, sort_t);
+      std::printf("\tsetpts for %lld NU pts: \t\t%.3g s\n", (long long)M, sort_t);
     else
-      printf("\tsetpts for %lld + %lld NU pts: \t%.3g s\n", (long long)M, (long long)N,
-             sort_t);
+      std::printf("\tsetpts for %lld + %lld NU pts: \t%.3g s\n",
+                  (long long)M,
+                  (long long)N,
+                  sort_t);
   }
 
   timer.restart(); // Guru Step 3
-  ier           = FINUFFT_EXECUTE(plan, c, F);
+  ier = FINUFFT_EXECUTE(plan, c.get(), F.get());
   double exec_t = timer.elapsedsec();
   if (ier) {
-    printf("error (ier=%d)!\n", ier);
+    std::printf("error (ier=%d)!\n", ier);
     return ier;
   } else
-    printf("\texec \t\t\t\t\t%.3g s\n", exec_t);
+    std::printf("\texec \t\t\t\t\t%.3g s\n", exec_t);
 
   double totalTime = plan_t + sort_t + exec_t;
   if (type != 3)
-    printf("ntr=%d: %lld NU pts to %lld modes in %.3g s \t%.3g NU pts/s\n", ntransf,
-           (long long)M, (long long)N, totalTime, ntransf * M / totalTime);
+    std::printf("ntr=%d: %lld NU pts to %lld modes in %.3g s \t%.3g NU pts/s\n",
+                ntransf,
+                (long long)M,
+                (long long)N,
+                totalTime,
+                ntransf * M / totalTime);
   else
-    printf("ntr=%d: %lld NU pts to %lld NU pts in %.3g s \t%.3g tot NU pts/s\n", ntransf,
-           (long long)M, (long long)N, totalTime, ntransf * (N + M) / totalTime);
+    std::printf("ntr=%d: %lld NU pts to %lld NU pts in %.3g s \t%.3g tot NU pts/s\n",
+                ntransf,
+                (long long)M,
+                (long long)N,
+                totalTime,
+                ntransf * (N + M) / totalTime);
 
   // Comparing timing results with repeated calls to corresponding finufft function...
 
@@ -216,36 +254,40 @@ int main(int argc, char *argv[])
   // std::this_thread::sleep_for(std::chrono::seconds(1)); if c++11 is allowed
   sleep(tsleep); // sleep for one second using linux sleep call
 
-  printf(
+  std::printf(
       "Compare speed of repeated calls to simple interface:------------------------\n");
   // this used to actually call Alex's old (v1.1) src/finufft?d.cpp routines.
   // Since we don't want to ship those, we now call the simple interfaces.
 
-  double simpleTime = many_simple_calls(c, F, x, y, z, plan);
-  if (isnan(simpleTime)) return 1;
+  double simpleTime =
+      many_simple_calls(c.get(), F.get(), x.get(), y.get(), z.get(), plan);
+  if (std::isnan(simpleTime)) return 1;
 
   if (type != 3)
-    printf("%d of:\t%lld NU pts to %lld modes in %.3g s   \t%.3g NU pts/s\n", ntransf,
-           (long long)M, (long long)N, simpleTime, ntransf * M / simpleTime);
+    std::printf("%d of:\t%lld NU pts to %lld modes in %.3g s   \t%.3g NU pts/s\n",
+                ntransf,
+                (long long)M,
+                (long long)N,
+                simpleTime,
+                ntransf * M / simpleTime);
   else
-    printf("%d of:\t%lld NU pts to %lld NU pts in %.3g s  \t%.3g tot NU pts/s\n", ntransf,
-           (long long)M, (long long)N, simpleTime, ntransf * (M + N) / simpleTime);
-  printf("\tspeedup \t T_finufft%dd%d_simple / T_finufft%dd%d = %.3g\n", ndim, type, ndim,
-         type, simpleTime / totalTime);
+    std::printf("%d of:\t%lld NU pts to %lld NU pts in %.3g s  \t%.3g tot NU pts/s\n",
+                ntransf,
+                (long long)M,
+                (long long)N,
+                simpleTime,
+                ntransf * (M + N) / simpleTime);
+  std::printf("\tspeedup \t T_finufft%dd%d_simple / T_finufft%dd%d = %.3g\n",
+              ndim,
+              type,
+              ndim,
+              type,
+              simpleTime / totalTime);
 
   FINUFFT_DESTROY(plan); // Guru Step 4
   // (must be done *after* many_simple_calls, which sneaks a look at the plan!)
   // however, segfaults, maybe because plan->opts.debug changed?
 
-  //---------------------------- Free Memory (no need to test if NULL)
-  free(F);
-  free(c);
-  free(x);
-  free(y);
-  free(z);
-  free(s);
-  free(t);
-  free(u);
   return 0;
 }
 
@@ -411,8 +453,8 @@ double many_simple_calls(CPX *c, CPX *F, FLT *x, FLT *y, FLT *z, FINUFFT_PLAN pl
     }
 
     temp = finufftFunnel(cStart, fStart, x, y, z, plan);
-    if (isnan(temp)) {
-      fprintf(stderr, "[%s] Funnel call to finufft failed!\n", __func__);
+    if (std::isnan(temp)) {
+      std::fprintf(stderr, "[%s] Funnel call to finufft failed!\n", __func__);
       return NAN;
     } else
       time += temp;
diff --git a/perftest/manysmallprobs.cpp b/perftest/manysmallprobs.cpp
index 1b8ebfdb3..257e1e48d 100644
--- a/perftest/manysmallprobs.cpp
+++ b/perftest/manysmallprobs.cpp
@@ -1,4 +1,5 @@
 #include <complex>
+#include <vector>
 
 // public header
 #include "finufft.h"
@@ -7,8 +8,6 @@
 #include "finufft/test_defs.h"
 using namespace finufft::utils;
 
-using namespace std;
-
 int main(int argc, char *argv[])
 /* What is small-problem cost of FINUFFT library from C++, using plain
    arrays of C++ complex numbers?  Barnett 10/31/17.
@@ -33,36 +32,41 @@ int main(int argc, char *argv[])
   int reps   = 2e4;                              // how many repetitions
   double acc = 1e-6;                             // desired accuracy
 
-  complex<double> I = complex<double>(0.0, 1.0); // the imaginary unit
+  std::complex<double> I = std::complex<double>(0.0, 1.0); // the imaginary unit
   int ier;
 
   // generate some random nonuniform points (x) and complex strengths (c):
-  double *x          = (double *)malloc(sizeof(double) * M);
-  complex<double> *c = (complex<double> *)malloc(sizeof(complex<double>) * M);
+  std::vector<double> x(M);
+  std::vector<std::complex<double>> c(M);
   for (int j = 0; j < M; ++j) {
-    x[j] = PI * (2 * ((double)rand() / RAND_MAX) - 1); // uniform random in [-pi,pi]
-    c[j] =
-        2 * ((double)rand() / RAND_MAX) - 1 + I * (2 * ((double)rand() / RAND_MAX) - 1);
+    x[j] = PI * (2 * (static_cast<double>(std::rand()) / RAND_MAX) - 1); // uniform random in [-pi,pi]
+    c[j] = 2 * (static_cast<double>(std::rand()) / RAND_MAX) - 1 +
+           I * (2 * (static_cast<double>(std::rand()) / RAND_MAX) - 1);
   }
   // allocate output array for the Fourier modes:
-  complex<double> *F = (complex<double> *)malloc(sizeof(complex<double>) * N);
+  std::vector<std::complex<double>> F(N);
 
-  printf("repeatedly calling the simple interface: --------------------- \n");
+  std::printf("repeatedly calling the simple interface: --------------------- \n");
   CNTime timer;
   timer.start();
   for (int r = 0; r < reps; ++r) { // call the NUFFT (with iflag=+1):
     // printf("rep %d\n",r);
-    x[0] = PI * (-1.0 + 2 * (double)r / (double)reps); // one source jiggles around
-    c[0] = (1.0 + I) * (double)r / (double)reps;       // one coeff also jiggles
-    ier  = finufft1d1(M, x, c, +1, acc, N, F, NULL);
+    x[0] = PI * (-1.0 + 2 * static_cast<double>(r) / static_cast<double>(reps)); // one source jiggles around
+    c[0] = (1.0 + I) * static_cast<double>(r) / static_cast<double>(reps);       // one coeff also jiggles
+    ier = finufft1d1(M, x.data(), c.data(), +1, acc, N, F.data(), nullptr);
   }
   // (note this can't use the many-vectors interface since the NU change)
-  complex<double> y = F[0]; // actually use the data so not optimized away
-  printf(
+  std::complex<double> y = F[0]; // actually use the data so not optimized away
+  std::printf(
       "%d reps of 1d1 done in %.3g s,\t%.3g NU pts/s\t(last ier=%d)\nF[0]=%.6g + %.6gi\n",
-      reps, timer.elapsedsec(), reps * M / timer.elapsedsec(), ier, real(y), imag(y));
+      reps,
+      timer.elapsedsec(),
+      reps * M / timer.elapsedsec(),
+      ier,
+      std::real(y),
+      std::imag(y));
 
-  printf("repeatedly executing via the guru interface: -------------------\n");
+  std::printf("repeatedly executing via the guru interface: -------------------\n");
   timer.restart();
   finufft_plan plan;
   finufft_opts opts;
@@ -72,19 +76,21 @@ int main(int argc, char *argv[])
   int ntransf  = 1;                // since we do one at a time (neq reps)
   finufft_makeplan(1, 1, Ns, +1, ntransf, acc, &plan, &opts);
   for (int r = 0; r < reps; ++r) { // set the pts and execute
-    x[0] = PI * (-1.0 + 2 * (double)r / (double)reps); // one source jiggles around
+    x[0] = PI * (-1.0 + 2 * static_cast<double>(r) / static_cast<double>(reps)); // one source jiggles around
     // (of course if most sources *were* in fact fixed, use ZGEMM for them!)
-    finufft_setpts(plan, M, x, NULL, NULL, 0, NULL, NULL, NULL);
-    c[0] = (1.0 + I) * (double)r / (double)reps; // one coeff also jiggles
-    ier  = finufft_execute(plan, c, F);
+    finufft_setpts(plan, M, x.data(), nullptr, nullptr, 0, nullptr, nullptr, nullptr);
+    c[0] = (1.0 + I) * static_cast<double>(r) / static_cast<double>(reps); // one coeff also jiggles
+    ier = finufft_execute(plan, c.data(), F.data());
   }
   finufft_destroy(plan);
   y = F[0];
-  printf(
+  std::printf(
       "%d reps of 1d1 done in %.3g s,\t%.3g NU pts/s\t(last ier=%d)\nF[0]=%.6g + %.6gi\n",
-      reps, timer.elapsedsec(), reps * M / timer.elapsedsec(), ier, real(y), imag(y));
-  free(x);
-  free(c);
-  free(F);
+      reps,
+      timer.elapsedsec(),
+      reps * M / timer.elapsedsec(),
+      ier,
+      std::real(y),
+      std::imag(y));
   return ier;
 }
diff --git a/perftest/spreadtestnd.cpp b/perftest/spreadtestnd.cpp
index 64aac05f5..bee793d62 100644
--- a/perftest/spreadtestnd.cpp
+++ b/perftest/spreadtestnd.cpp
@@ -2,25 +2,20 @@
 #include <finufft/spreadinterp.h>
 #include <finufft/test_defs.h>
 
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
+#include <algorithm>
 #include <vector>
-
-#include "finufft/finufft_utils.hpp"
 using namespace finufft::spreadinterp;
-using namespace std;
 using namespace finufft::utils;
 
 void usage() {
-  printf("usage: spreadtestnd dims [M N [tol [sort [flags [debug [kerpad [kerevalmeth "
-         "[upsampfac]]]]]]]]\n\twhere dims=1,2 or 3\n\tM=# nonuniform pts\n\tN=# uniform "
-         "pts\n\ttol=requested accuracy\n\tsort=0 (don't sort NU pts), 1 (do), or 2 "
-         "(maybe sort; default)\n\tflags: expert timing flags, 0 is default (see "
-         "spreadinterp.h)\n\tdebug=0 (less text out), 1 (more), 2 (lots)\n\tkerpad=0 (no "
-         "pad to mult of 4), 1 (do, for kerevalmeth=0 only)\n\tkerevalmeth=0 (direct), 1 "
-         "(Horner ppval)\n\tupsampfac>1; 2 or 1.25 for Horner\n\nexample: ./spreadtestnd "
-         "1 1e6 1e6 1e-6 2 0 1\n");
+  std::printf("usage: spreadtestnd dims [M N [tol [sort [flags [debug [kerpad [kerevalmeth "
+              "[upsampfac]]]]]]]]\n\twhere dims=1,2 or 3\n\tM=# nonuniform pts\n\tN=# uniform "
+              "pts\n\ttol=requested accuracy\n\tsort=0 (don't sort NU pts), 1 (do), or 2 "
+              "(maybe sort; default)\n\tflags: expert timing flags, 0 is default (see "
+              "spreadinterp.h)\n\tdebug=0 (less text out), 1 (more), 2 (lots)\n\tkerpad=0 (no "
+              "pad to mult of 4), 1 (do, for kerevalmeth=0 only)\n\tkerevalmeth=0 (direct), 1 "
+              "(Horner ppval)\n\tupsampfac>1; 2 or 1.25 for Horner\n\nexample: ./spreadtestnd "
+              "1 1e6 1e6 1e-6 2 0 1\n");
 }
 
 int main(int argc, char *argv[])
@@ -57,75 +52,75 @@ int main(int argc, char *argv[])
     usage();
     return (argc > 1);
   }
-  sscanf(argv[1], "%d", &d);
+  std::sscanf(argv[1], "%d", &d);
   if (d < 1 || d > 3) {
-    printf("d must be 1, 2 or 3!\n");
+    std::printf("d must be 1, 2 or 3!\n");
     usage();
     return 1;
   }
   if (argc > 2) {
-    sscanf(argv[2], "%lf", &w);
+    std::sscanf(argv[2], "%lf", &w);
     M = (BIGINT)w; // to read "1e6" right!
     if (M < 1) {
-      printf("M (# NU pts) must be positive!\n");
+      std::printf("M (# NU pts) must be positive!\n");
       usage();
       return 1;
     }
-    sscanf(argv[3], "%lf", &w);
+    std::sscanf(argv[3], "%lf", &w);
     roughNg = (BIGINT)w;
     if (roughNg < 1) {
-      printf("N (# U pts) must be positive!\n");
+      std::printf("N (# U pts) must be positive!\n");
       usage();
       return 1;
     }
   }
-  if (argc > 4) sscanf(argv[4], "%lf", &tol);
+  if (argc > 4) std::sscanf(argv[4], "%lf", &tol);
   if (argc > 5) {
-    sscanf(argv[5], "%d", &sort);
+    std::sscanf(argv[5], "%d", &sort);
     if ((sort != 0) && (sort != 1) && (sort != 2)) {
-      printf("sort must be 0, 1 or 2!\n");
+      std::printf("sort must be 0, 1 or 2!\n");
       usage();
       return 1;
     }
   }
-  if (argc > 6) sscanf(argv[6], "%d", &flags);
+  if (argc > 6) std::sscanf(argv[6], "%d", &flags);
   if (argc > 7) {
-    sscanf(argv[7], "%d", &debug);
+    std::sscanf(argv[7], "%d", &debug);
     if ((debug < 0) || (debug > 2)) {
-      printf("debug must be 0, 1 or 2!\n");
+      std::printf("debug must be 0, 1 or 2!\n");
       usage();
       return 1;
     }
   }
   if (argc > 8) {
-    sscanf(argv[8], "%d", &kerpad);
+    std::sscanf(argv[8], "%d", &kerpad);
     if ((kerpad < 0) || (kerpad > 1)) {
-      printf("kerpad must be 0 or 1!\n");
+      std::printf("kerpad must be 0 or 1!\n");
       usage();
       return 1;
     }
   }
   if (argc > 9) {
-    sscanf(argv[9], "%d", &kerevalmeth);
+    std::sscanf(argv[9], "%d", &kerevalmeth);
     if ((kerevalmeth < 0) || (kerevalmeth > 1)) {
-      printf("kerevalmeth must be 0 or 1!\n");
+      std::printf("kerevalmeth must be 0 or 1!\n");
       usage();
       return 1;
     }
   }
   if (argc > 10) {
-    sscanf(argv[10], "%lf", &w);
+    std::sscanf(argv[10], "%lf", &w);
     upsampfac = (FLT)w;
     if (upsampfac <= 1.0) {
-      printf("upsampfac must be >1.0!\n");
+      std::printf("upsampfac must be >1.0!\n");
       usage();
       return 1;
     }
   }
 
   int dodir1 = true;                                   // control if dir=1 tested at all
-  BIGINT N   = (BIGINT)round(pow(roughNg, 1.0 / d));   // Fourier grid size per dim
-  BIGINT Ng  = (BIGINT)pow(N, d);                      // actual total grid points
+  BIGINT N   = (BIGINT)std::round(std::pow(roughNg, 1.0 / d)); // Fourier grid size per dim
+  BIGINT Ng  = (BIGINT)std::pow(N, d);                          // actual total grid points
   BIGINT N2 = (d >= 2) ? N : 1, N3 = (d == 3) ? N : 1; // the y and z grid sizes
   std::vector<FLT> kx(M), ky(1), kz(1), d_nonuniform(2 * M); // NU, Re & Im
   if (d > 1) ky.resize(M);                                   // only alloc needed coords
@@ -135,7 +130,7 @@ int main(int argc, char *argv[])
   finufft_spread_opts opts;
   int ier_set = setup_spreader(opts, (FLT)tol, upsampfac, kerevalmeth, debug, 1, d, 1);
   if (ier_set > 1) { // exit gracefully if can't set up.
-    printf("error when setting up spreader (ier_set=%d)!\n", ier_set);
+    std::printf("error when setting up spreader (ier_set=%d)!\n", ier_set);
     return ier_set;
   }
   opts.debug        = debug; // print more diagnostics?
@@ -175,7 +170,7 @@ int main(int argc, char *argv[])
   }
 
   // now do the large-scale test w/ random sources..
-  printf("making random data...\n");
+  std::printf("making random data...\n");
   FLT strre = 0.0, strim = 0.0; // also sum the strengths
 #pragma omp parallel
   {
@@ -195,12 +190,12 @@ int main(int argc, char *argv[])
   CNTime timer;
   double t;
   if (dodir1) { // test direction 1 (NU -> U spreading) ......................
-    printf("spreadinterp %dD, %.3g U pts, dir=%d, tol=%.3g: nspread=%d\n",
-           d,
-           (double)Ng,
-           opts.spread_direction,
-           tol,
-           opts.nspread);
+    std::printf("spreadinterp %dD, %.3g U pts, dir=%d, tol=%.3g: nspread=%d\n",
+                d,
+                static_cast<double>(Ng),
+                opts.spread_direction,
+                tol,
+                opts.nspread);
     timer.start();
     ier = spreadinterp(N,
                        N2,
@@ -214,14 +209,14 @@ int main(int argc, char *argv[])
                        opts);
     t   = timer.elapsedsec();
     if (ier != 0) {
-      printf("error (ier=%d)!\n", ier);
+      std::printf("error (ier=%d)!\n", ier);
       return ier;
     } else
-      printf("    %.3g NU pts in %.3g s \t%.3g pts/s \t%.3g spread pts/s\n",
-             (double)M,
-             t,
-             M / t,
-             pow(opts.nspread, d) * M / t);
+      std::printf("    %.3g NU pts in %.3g s \t%.3g pts/s \t%.3g spread pts/s\n",
+                  static_cast<double>(M),
+                  t,
+                  M / t,
+                  std::pow(opts.nspread, d) * M / t);
 
     FLT sumre = 0.0, sumim = 0.0; // check spreading accuracy, wrapping
 #pragma omp parallel for reduction(+ : sumre, sumim)
@@ -231,15 +226,15 @@ int main(int argc, char *argv[])
     }
     FLT pre    = kersumre * strre - kersumim * strim; // pred ans, complex mult
     FLT pim    = kersumim * strre + kersumre * strim;
-    FLT maxerr = std::max(fabs(sumre - pre), fabs(sumim - pim));
-    FLT ansmod = sqrt(sumre * sumre + sumim * sumim);
-    printf("    rel err in total over grid:      %.3g\n", maxerr / ansmod);
+    FLT maxerr = std::max(std::fabs(sumre - pre), std::fabs(sumim - pim));
+    FLT ansmod = std::sqrt(sumre * sumre + sumim * sumim);
+    std::printf("    rel err in total over grid:      %.3g\n", maxerr / ansmod);
     // note this is weaker than below dir=2 test, but is good indicator that
     // periodic wrapping is correct
   }
 
   // test direction 2 (U -> NU interpolation) ..............................
-  printf("making more random NU pts...\n");
+  std::printf("making more random NU pts...\n");
   for (BIGINT i = 0; i < Ng; ++i) { // unit grid data
     d_uniform[2 * i]     = 1.0;
     d_uniform[2 * i + 1] = 0.0;
@@ -257,12 +252,12 @@ int main(int argc, char *argv[])
   }
 
   opts.spread_direction = 2;
-  printf("spreadinterp %dD, %.3g U pts, dir=%d, tol=%.3g: nspread=%d\n",
-         d,
-         (double)Ng,
-         opts.spread_direction,
-         tol,
-         opts.nspread);
+  std::printf("spreadinterp %dD, %.3g U pts, dir=%d, tol=%.3g: nspread=%d\n",
+              d,
+              static_cast<double>(Ng),
+              opts.spread_direction,
+              tol,
+              opts.nspread);
   timer.restart();
   ier = spreadinterp(N,
                      N2,
@@ -276,24 +271,24 @@ int main(int argc, char *argv[])
                      opts);
   t   = timer.elapsedsec();
   if (ier != 0) {
-    printf("error (ier=%d)!\n", ier);
+    std::printf("error (ier=%d)!\n", ier);
     return 1;
   } else
-    printf("    %.3g NU pts in %.3g s \t%.3g pts/s \t%.3g spread pts/s\n",
-           (double)M,
-           t,
-           M / t,
-           pow(opts.nspread, d) * M / t);
+    std::printf("    %.3g NU pts in %.3g s \t%.3g pts/s \t%.3g spread pts/s\n",
+                static_cast<double>(M),
+                t,
+                M / t,
+                std::pow(opts.nspread, d) * M / t);
 
   // math test is worst-case error from pred value (kersum) on interp pts:
   maxerr = 0.0;
   for (BIGINT i = 0; i < M; ++i) {
-    FLT err = std::max(fabs(d_nonuniform[2 * i] - kersumre),
-                       fabs(d_nonuniform[2 * i + 1] - kersumim));
+    FLT err = std::max(std::fabs(d_nonuniform[2 * i] - kersumre),
+                       std::fabs(d_nonuniform[2 * i + 1] - kersumim));
     if (err > maxerr) maxerr = err;
   }
-  ansmod = sqrt(kersumre * kersumre + kersumim * kersumim);
-  printf("    max rel err in values at NU pts: %.3g\n", maxerr / ansmod);
+  ansmod = std::sqrt(kersumre * kersumre + kersumim * kersumim);
+  std::printf("    max rel err in values at NU pts: %.3g\n", maxerr / ansmod);
   // this is stronger test than for dir=1, since it tests sum of kernel for
   // each NU pt. However, it cannot detect reading
   // from wrong grid pts (they are all unity)
diff --git a/src/c_interface.cpp b/src/c_interface.cpp
index 68fbed44f..91db36c89 100644
--- a/src/c_interface.cpp
+++ b/src/c_interface.cpp
@@ -1,12 +1,11 @@
 // public header
 #include <finufft.h>
 // private headers
+#include <algorithm>
 #include <array>
 #include <cstdio>
 #include <finufft/finufft_core.h> // (must come after complex.h)
 
-using namespace std;
-
 /* ---------------------------------------------------------------------------
    The 18 simple interfaces (= 3 dims * 3 types * {singlecall,many}) to FINUFFT.
    As of v1.2 these simply invoke the guru interface, through a helper layer.
@@ -124,7 +123,7 @@ static int guru(
   }
 
   delete plan;
-  return max(max(ier, ier2), ier3); // in case any one gave a (positive!) warning
+  return std::max(std::max(ier, ier2), ier3); // in case any one gave a (positive!) warning
 }
 template<typename T>
 static int guru13(int n_dims, int type, int n_transf, i64 nj,
diff --git a/src/cuda/deconvolve_wrapper.cu b/src/cuda/deconvolve_wrapper.cu
index 38a4f0da9..e9db57eda 100644
--- a/src/cuda/deconvolve_wrapper.cu
+++ b/src/cuda/deconvolve_wrapper.cu
@@ -1,3 +1,4 @@
+#include <cstdlib>
 #include <iomanip>
 #include <iostream>
 
@@ -24,7 +25,7 @@ __global__ void deconvolve_1d(int ms, int nf1, cuda_complex<T> *fw, cuda_complex
     if (modeord == 0) {
       pivot1    = i - ms / 2;
       w1        = (pivot1 >= 0) ? pivot1 : nf1 + pivot1;
-      fwkerind1 = abs(pivot1);
+      fwkerind1 = std::abs(pivot1);
     } else {
       pivot1    = i - ms + ms / 2;
       w1        = (pivot1 >= 0) ? nf1 + i - ms : i;
@@ -55,8 +56,8 @@ __global__ void deconvolve_2d(int ms, int mt, int nf1, int nf2, cuda_complex<T>
       pivot2    = k2 - mt / 2;
       w1        = (pivot1 >= 0) ? pivot1 : nf1 + pivot1;
       w2        = (pivot2 >= 0) ? pivot2 : nf2 + pivot2;
-      fwkerind1 = abs(pivot1);
-      fwkerind2 = abs(pivot2);
+      fwkerind1 = std::abs(pivot1);
+      fwkerind2 = std::abs(pivot2);
     } else {
       pivot1    = k1 - ms + ms / 2;
       pivot2    = k2 - mt + mt / 2;
@@ -95,9 +96,9 @@ __global__ void deconvolve_3d(int ms, int mt, int mu, int nf1, int nf2, int nf3,
       w1        = (pivot1 >= 0) ? pivot1 : nf1 + pivot1;
       w2        = (pivot2 >= 0) ? pivot2 : nf2 + pivot2;
       w3        = (pivot3 >= 0) ? pivot3 : nf3 + pivot3;
-      fwkerind1 = abs(pivot1);
-      fwkerind2 = abs(pivot2);
-      fwkerind3 = abs(pivot3);
+      fwkerind1 = std::abs(pivot1);
+      fwkerind2 = std::abs(pivot2);
+      fwkerind3 = std::abs(pivot3);
     } else {
       pivot1    = k1 - ms + ms / 2;
       pivot2    = k2 - mt + mt / 2;
@@ -129,7 +130,7 @@ __global__ void amplify_1d(int ms, int nf1, cuda_complex<T> *fw, cuda_complex<T>
     if (modeord == 0) {
       pivot1    = i - ms / 2;
       w1        = (pivot1 >= 0) ? pivot1 : nf1 + pivot1;
-      fwkerind1 = abs(pivot1);
+      fwkerind1 = std::abs(pivot1);
     } else {
       pivot1    = i - ms + ms / 2;
       w1        = (pivot1 >= 0) ? nf1 + i - ms : i;
@@ -160,8 +161,8 @@ __global__ void amplify_2d(int ms, int mt, int nf1, int nf2, cuda_complex<T> *fw
       pivot2    = k2 - mt / 2;
       w1        = (pivot1 >= 0) ? pivot1 : nf1 + pivot1;
       w2        = (pivot2 >= 0) ? pivot2 : nf2 + pivot2;
-      fwkerind1 = abs(pivot1);
-      fwkerind2 = abs(pivot2);
+      fwkerind1 = std::abs(pivot1);
+      fwkerind2 = std::abs(pivot2);
     } else {
       pivot1    = k1 - ms + ms / 2;
       pivot2    = k2 - mt + mt / 2;
@@ -200,9 +201,9 @@ __global__ void amplify_3d(int ms, int mt, int mu, int nf1, int nf2, int nf3,
       w1        = (pivot1 >= 0) ? pivot1 : nf1 + pivot1;
       w2        = (pivot2 >= 0) ? pivot2 : nf2 + pivot2;
       w3        = (pivot3 >= 0) ? pivot3 : nf3 + pivot3;
-      fwkerind1 = abs(pivot1);
-      fwkerind2 = abs(pivot2);
-      fwkerind3 = abs(pivot3);
+      fwkerind1 = std::abs(pivot1);
+      fwkerind2 = std::abs(pivot2);
+      fwkerind3 = std::abs(pivot3);
     } else {
       pivot1    = k1 - ms + ms / 2;
       pivot2    = k2 - mt + mt / 2;
diff --git a/src/cuda/spreadinterp.cpp b/src/cuda/spreadinterp.cpp
index 96210d4ec..5fcfebfcb 100644
--- a/src/cuda/spreadinterp.cpp
+++ b/src/cuda/spreadinterp.cpp
@@ -55,10 +55,10 @@ int setup_spreader(finufft_spread_opts &opts, T eps, T upsampfac,
   }
 
   // Set kernel width w (aka ns) and ES kernel beta parameter, in opts...
-  int ns = std::ceil(-log10(eps / (T)10.0)); // 1 digit per power of ten
+  int ns = std::ceil(-std::log10(eps / (T)10.0)); // 1 digit per power of ten
   if (upsampfac != 2.0)                      // override ns for custom sigma
     ns = std::ceil(
-        -log(eps) / (T(PI) * sqrt(1 - 1 / upsampfac))); // formula,
+        -std::log(eps) / (T(PI) * std::sqrt(1 - 1 / upsampfac))); // formula,
                                                                            // gamma=1
   ns = std::max(2, ns);                      // we don't have ns=1 version yet
   if (ns > MAX_NSPREAD) { // clip to match allocated arrays
diff --git a/src/fft.cpp b/src/fft.cpp
index 33fc8897e..a7021fca9 100644
--- a/src/fft.cpp
+++ b/src/fft.cpp
@@ -1,8 +1,6 @@
 #include <algorithm>
 #include <finufft/fft.h>
 
-using namespace std;
-
 #ifdef FINUFFT_USE_DUCC0
 #include "ducc0/fft/fftnd_impl.h"
 #endif
@@ -22,9 +20,9 @@ template<typename TF>
 void do_fft(const FINUFFT_PLAN_T<TF> &p, std::complex<TF> *fwBatch,
             int ntrans_actual [[maybe_unused]], bool adjoint) {
 #ifdef FINUFFT_USE_DUCC0
-  size_t nthreads = min<size_t>(MY_OMP_GET_MAX_THREADS(), p.opts.nthreads);
+  size_t nthreads = std::min<size_t>(MY_OMP_GET_MAX_THREADS(), p.opts.nthreads);
   const auto ns   = gridsize_for_fft(p);
-  vector<size_t> arrdims, axes;
+  std::vector<size_t> arrdims, axes;
   // ntrans_actual may be smaller than batchSize, which we can use
   // to our advantage with ducc FFT.
   arrdims.push_back(size_t(ntrans_actual));
diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp
index 8937dd070..472ac52b1 100644
--- a/src/spreadinterp.cpp
+++ b/src/spreadinterp.cpp
@@ -14,8 +14,6 @@
 #include <cstdio>
 #include <cstdlib>
 #include <vector>
-
-using namespace std;
 using namespace finufft::utils;  // access to timer
 using namespace finufft::common; // access to constants and dispatch
 
@@ -133,7 +131,7 @@ template<class T> uint8_t get_padding(uint8_t ns) {
 template<class T, uint8_t N>
 using BestSIMD = typename decltype(BestSIMDHelper<T, N, xsimd::batch<T>::size>())::type;
 template<class T, class V, size_t... Is>
-constexpr T generate_sequence_impl(V a, V b, index_sequence<Is...>) noexcept {
+constexpr T generate_sequence_impl(V a, V b, std::index_sequence<Is...>) noexcept {
   // utility function to generate a sequence of a, b interleaved as function arguments
   return T(((Is % 2 == 0) ? a : b)...);
 }
@@ -221,7 +219,7 @@ void print_subgrid_info(int ndims, BIGINT offset1, BIGINT offset2, BIGINT offset
 template<typename T>
 static FINUFFT_ALWAYS_INLINE T fold_rescale(const T x, const UBIGINT N) noexcept {
   const T result = x * T(INV_2PI) + T(0.5);
-  return (result - floor(result)) * T(N);
+  return (result - std::floor(result)) * T(N);
 }
 
 template<typename T, uint8_t ns>
@@ -258,11 +256,11 @@ static void evaluate_kernel_vector(T *ker, T *args,
     }
     for (int i = 0; i < Npad; i++) { // Loop 1: Compute exponential arguments
       // care! 1.0 is double...
-      ker[i] = b * (sqrt((T)1.0 - c * args[i] * args[i]) - (T)1.0);
+      ker[i] = b * (std::sqrt((T)1.0 - c * args[i] * args[i]) - (T)1.0);
     }
     if (!(opts.flags & TF_OMIT_EVALUATE_EXPONENTIAL))
       for (int i = 0; i < Npad; i++) // Loop 2: Compute exponentials
-        ker[i] = exp(ker[i]);
+        ker[i] = std::exp(ker[i]);
     if (opts.kerpad) {
       // padded part should be zero, in spread_subproblem_nd_kernels, there are
       // out of bound writes to trg arrays
@@ -274,7 +272,7 @@ static void evaluate_kernel_vector(T *ker, T *args,
   }
   // Separate check from arithmetic (Is this really needed? doesn't slow down)
   for (int i = 0; i < N; i++)
-    if (abs(args[i]) >= (T)opts.ES_halfwidth) ker[i] = 0.0;
+    if (std::abs(args[i]) >= (T)opts.ES_halfwidth) ker[i] = 0.0;
 }
 
 template<typename T, uint8_t w, uint8_t upsampfact,
@@ -2144,9 +2142,9 @@ int setup_spreader(
   if (upsampfac == 2.0)                    // standard sigma (see SISC paper)
     ns = std::ceil(-log10(eps / (T)10.0)); // 1 digit per power of 10
   else                                     // custom sigma
-    ns = std::ceil(-log(eps) / (T(PI) * sqrt(1.0 - 1.0 / upsampfac))); // formula,
+    ns = std::ceil(-std::log(eps) / (T(PI) * std::sqrt(1.0 - 1.0 / upsampfac))); // formula,
                                                                        // gam=1
-  ns = max(2, ns);        // (we don't have ns=1 version yet)
+  ns = std::max(2, ns);   // (we don't have ns=1 version yet)
   if (ns > MAX_NSPREAD) { // clip to fit allocated arrays, Horner rules
     if (showwarn)
       fprintf(stderr,
@@ -2195,11 +2193,12 @@ T evaluate_kernel(T x, const finufft_spread_opts &opts)
    Rescaled so max is 1, Barnett 7/21/24
 */
 {
-  if (abs(x) >= (T)opts.ES_halfwidth)
+  if (std::abs(x) >= (T)opts.ES_halfwidth)
     // if spreading/FT careful, shouldn't need this if, but causes no speed hit
     return 0.0;
   else
-    return exp((T)opts.ES_beta * (sqrt((T)1.0 - (T)opts.ES_c * x * x) - (T)1.0));
+    return std::exp((T)opts.ES_beta *
+                    (std::sqrt((T)1.0 - (T)opts.ES_c * x * x) - (T)1.0));
 }
 
 template float evaluate_kernel<float>(float x, const finufft_spread_opts &opts);
@@ -2212,7 +2211,7 @@ T evaluate_kernel_horner_kernel(T x, const finufft_spread_opts &opts)
    using generated piecewise polynomial approximation to the kernel.
 */
 {
-  if (abs(x) >= (T)opts.ES_halfwidth) {
+  if (std::abs(x) >= (T)opts.ES_halfwidth) {
     // if spreading/FT careful, shouldn't need this if, but causes no speed hit
     return 0.0;
   } else {
diff --git a/test/adjointness.cpp b/test/adjointness.cpp
index 72286ce4b..3089ebe0a 100644
--- a/test/adjointness.cpp
+++ b/test/adjointness.cpp
@@ -1,6 +1,12 @@
 #include "utils/norms.hpp"
 #include <finufft/test_defs.h>
 
+#include <algorithm>
+#include <cmath>
+#include <iostream>
+#include <string>
+#include <vector>
+
 /* Test that execute_adjoint applies the adjoint of execute, in the
    guru interface, for all types and dimensions.
    We use the test_defs macros, as with other C-interface tests.
@@ -22,7 +28,7 @@
    turned out had zero-mean Gaussian pdf (=> fat-tailed pdf of reciprocal, bad!)
 */
 
-using namespace std;
+
 
 int main() {
 
@@ -36,17 +42,17 @@ int main() {
 #ifdef SINGLE
   FLT tol        = 1e-6; // requested transform tol (small enough to force USF=2)
   FLT allowederr = 1e-4; // ~1e3*epsmach (allow USF=1.25 larger r_dyn)
-  string name    = "adjointnessf";
+  std::string name    = "adjointnessf";
 #else
   FLT tol        = 1e-12; // requested transform tol (eps<=1e-9 => USF=2 guaranteed)
   FLT allowederr = 1e-10; // ~1e6*epsmach (USF=2 r_dyn^3<1e3, but allow USF=1.25)
-  string name    = "adjointness";
+  std::string name    = "adjointness";
 #endif
 
-  cout << "adjointness: making random data...";
+  std::cout << "adjointness: making random data...";
   // generate random non-uniform points on (x,y) and complex strengths (c)
-  vector<FLT> x(M), y(M), z(M);
-  vector<CPX> c(M);
+  std::vector<FLT> x(M), y(M), z(M);
+  std::vector<CPX> c(M);
   for (int i = 0; i < M; i++) {
     x[i] = PI * randm11(); // unif random in [-pi, pi)
     y[i] = PI * randm11();
@@ -56,12 +62,12 @@ int main() {
 
   // generate random mode coeffs (f), enough for any dim up to 3
   BIGINT Nmax = Ns[0] * Ns[1] * Ns[2];
-  vector<CPX> f(Nmax);
+  std::vector<CPX> f(Nmax);
   for (int i = 0; i < Nmax; i++) f[i] = crandm11();
-  cout << " done" << endl;
+  std::cout << " done" << std::endl;
 
   // generate random freq targs for type 3 only (t3 always uses Nmax targs)
-  vector<FLT> s(Nmax), t(Nmax), u(Nmax);
+  std::vector<FLT> s(Nmax), t(Nmax), u(Nmax);
   for (int i = 0; i < Nmax; i++) { // space-bandwidth prod O(Nd) for dim d
     s[i] = Ns[0] / 2 * randm11();  // unif random in [-N1/2,N1/2]
     t[i] = Ns[1] / 2 * randm11();
@@ -69,18 +75,18 @@ int main() {
   }
 
   // allocate output arrays for adjoint testing (Capital denotes output)
-  vector<CPX> C(M);
-  vector<CPX> F(Nmax);
+  std::vector<CPX> C(M);
+  std::vector<CPX> F(Nmax);
   FLT errmax = 0.0; // track worst errors across tests
   int ier, ieradj, iermax = 0;
 
   for (int dim = 1; dim <= 3; ++dim) { // ....... loop over dims
     BIGINT N = 1;                      // compute actual num modes in this dim
     for (int d = 0; d < dim; ++d) N *= Ns[d];
-    cout << "  dim=" << dim << ", M=" << M << " pts, N=" << N << " modes:" << endl;
+    std::cout << "  dim=" << dim << ", M=" << M << " pts, N=" << N << " modes:" << std::endl;
 
     for (int type = 1; type <= 3; ++type) { // .......... loop over types
-      cout << "\ttype " << type << ": ";
+      std::cout << "\ttype " << type << ": ";
       FINUFFT_PLAN plan;
       FINUFFT_MAKEPLAN(type, dim, Ns, isign, ntr, tol, &plan, &opts);
       // always input NU pts and freq targs (latter only used by t3)...
@@ -93,33 +99,35 @@ int main() {
         ier    = FINUFFT_EXECUTE(plan, C.data(), f.data()); // f->C
         ieradj = FINUFFT_EXECUTE_ADJOINT(plan, c.data(), F.data()); // c->F
       }
-      if (ier > 0) cout << "\texecute failure: ier=" << ier << endl;
-      if (ieradj > 0) cout << "\texecute_adjoint failure: ier=" << ieradj << endl;
-      iermax = max(max(ier, ieradj), iermax); // track if something failed
+      if (ier > 0) std::cout << "\texecute failure: ier=" << ier << std::endl;
+      if (ieradj > 0) std::cout << "\texecute_adjoint failure: ier=" << ieradj
+                << std::endl;
+      iermax = std::max(std::max(ier, ieradj), iermax); // track if something failed
       FINUFFT_DESTROY(plan);
 
       // measure scalar error (f,F) - (C,c), should vanish by adjointness...
       CPX ipc = 0.0, ipf = 0.0;           // inner-prod results for (C,c) and (f,F)
-      for (int i = 0; i < M; i++) ipc += conj(C[i]) * c[i];
+      for (int i = 0; i < M; i++) ipc += std::conj(C[i]) * c[i];
       int Nused = (type == 3) ? Nmax : N; // how many modes or freqs used
-      for (int j = 0; j < Nused; j++) ipf += conj(f[j]) * F[j];
+      for (int j = 0; j < Nused; j++) ipf += std::conj(f[j]) * F[j];
 
       // denominator for rel error (twonorm in utils.hpp), see discussion at top:
       // FLT denom = twonorm(M,C.data()) * twonorm(M,c.data()) / sqrt(M);  // v sim
-      FLT denom = twonorm(Nused, F.data()) * twonorm(Nused, f.data()) / sqrt(Nused);
-      FLT err   = abs(ipc - ipf) / denom;     // scale rel to norms of vectors in ipc
-      cout << " adj rel err " << err << endl; // "\t denom=" << denom << endl;
-      errmax = max(err, errmax);
+      FLT denom =
+          twonorm(Nused, F.data()) * twonorm(Nused, f.data()) / std::sqrt(Nused);
+      FLT err   = std::abs(ipc - ipf) / denom;     // scale rel to norms of vectors in ipc
+      std::cout << " adj rel err " << err << std::endl; // "\t denom=" << denom << endl;
+      errmax = std::max(err, errmax);
     }
   }
 
   // report and exit code
   if (errmax > allowederr || iermax > 0) {
-    cout << name << " failed! (allowederr=" << allowederr << ", iermax=" << iermax << ")"
-         << endl;
+    std::cout << name << " failed! (allowederr=" << allowederr << ", iermax=" << iermax
+              << ")" << std::endl;
     return 1;
   } else {
-    cout << name << " passed" << endl;
+    std::cout << name << " passed" << std::endl;
     return 0;
   }
 }
diff --git a/test/basicpassfail.cpp b/test/basicpassfail.cpp
index ce23637db..af8d6cd8a 100644
--- a/test/basicpassfail.cpp
+++ b/test/basicpassfail.cpp
@@ -1,3 +1,5 @@
+#include <cmath>
+
 #include <finufft/test_defs.h>
 
 // Basic pass-fail test of one routine in library w/ default opts.
@@ -32,14 +34,15 @@ int main() {
   // Check correct math for a single mode...................
   BIGINT n  = (BIGINT)(0.37 * N); // choose some mode near the top (N/2)
   CPX Ftest = CPX(0.0, 0.0);      // crude exact answer & error check...
-  for (BIGINT j = 0; j < M; ++j) Ftest += c[j] * exp((FLT)isign * I * (FLT)n * x[j]);
+  for (BIGINT j = 0; j < M; ++j)
+    Ftest += c[j] * std::exp((FLT)isign * I * (FLT)n * x[j]);
   BIGINT nout = n + N / 2;        // index in output array for freq mode n
   FLT Finfnrm = 0.0;              // compute inf norm of F...
   for (int m = 0; m < N; ++m) {
-    FLT aF = abs(F[m]);           // note C++ abs complex type, not C fabs(f)
+    FLT aF = std::abs(F[m]);           // note C++ abs complex type, not C fabs(f)
     if (aF > Finfnrm) Finfnrm = aF;
   }
-  FLT relerr = abs(F[nout] - Ftest) / Finfnrm;
+  FLT relerr = std::abs(F[nout] - Ftest) / Finfnrm;
   // printf("requested tol %.3g: rel err for one mode %.3g\n",tol,relerr);
   return (std::isnan(relerr) || relerr > 10.0 * tol); // true reports failure
 }
diff --git a/test/cuda/cufinufft1d_test.cu b/test/cuda/cufinufft1d_test.cu
index 423f7af3a..f4a735b19 100644
--- a/test/cuda/cufinufft1d_test.cu
+++ b/test/cuda/cufinufft1d_test.cu
@@ -8,8 +8,8 @@
 #include <thrust/device_vector.h>
 #include <thrust/host_vector.h>
 
-#include <finufft_common/common.h>
 #include <cufinufft.h>
+#include <finufft_common/common.h>
 
 #include "../utils/dirft1d.hpp"
 #include "../utils/norms.hpp"
@@ -187,9 +187,10 @@ int run_test(int method, int type, int N1, int M, T tol, T checktol, int iflag,
   if (type == 1) {
     int nt1                = 0.37 * N1; // choose some mode index to check
     thrust::complex<T> Ftp = thrust::complex<T>(0, 0), J = thrust::complex<T>(0.0, iflag);
-    for (int j = 0; j < M; ++j) Ftp += c[j] * exp(J * (nt1 * x[j])); // crude direct
-    int it    = N1 / 2 + nt1; // index in complex F as 1d array
-    rel_error = abs(Ftp - fk[it]) / infnorm(N1, fk);
+    for (int j = 0; j < M; ++j)
+      Ftp += c[j] * thrust::exp(J * (nt1 * x[j])); // crude direct
+    int it    = N1 / 2 + nt1;                      // index in complex F as 1d array
+    rel_error = thrust::abs(Ftp - fk[it]) / infnorm(N1, fk);
     printf("[gpu   ] one mode: rel err in F[%d] is %.3g\n", nt1, rel_error);
     if (static_cast<double>(M) * N1 <= TEST_BIGPROB) {
       // also full direct eval
@@ -205,8 +206,8 @@ int run_test(int method, int type, int N1, int M, T tol, T checktol, int iflag,
     thrust::complex<T> ctp = thrust::complex<T>(0, 0);
     int m                  = 0;
     for (int m1 = -(N1 / 2); m1 <= (N1 - 1) / 2; ++m1)
-      ctp += fk[m++] * exp(J * (m1 * x[jt])); // crude direct
-    rel_error = abs(c[jt] - ctp) / infnorm(M, (std::complex<T> *)c.data());
+      ctp += fk[m++] * thrust::exp(J * (m1 * x[jt])); // crude direct
+    rel_error = thrust::abs(c[jt] - ctp) / infnorm(M, (std::complex<T> *)c.data());
     printf("[gpu   ] one targ: rel err in c[%d] is %.3g\n", jt, rel_error);
     if (static_cast<double>(M) * N1 <= TEST_BIGPROB) {
       std::vector<thrust::complex<T>> ct(M);
@@ -222,9 +223,9 @@ int run_test(int method, int type, int N1, int M, T tol, T checktol, int iflag,
     thrust::complex<T> Ftp = thrust::complex<T>(0, 0);
 
     for (int j = 0; j < M; ++j) {
-      Ftp += c[j] * exp(J * (x[j] * s[jt]));
+      Ftp += c[j] * thrust::exp(J * (x[j] * s[jt]));
     }
-    rel_error = abs(Ftp - fk[jt]) / infnorm(N1, (std::complex<T> *)fk.data());
+    rel_error = thrust::abs(Ftp - fk[jt]) / infnorm(N1, (std::complex<T> *)fk.data());
     printf("[gpu   ] one mode: rel err in F[%d] is %.3g\n", jt, rel_error);
     if (static_cast<double>(M) * N1 <= TEST_BIGPROB) {
       std::vector<thrust::complex<T>> Ft(N1);
diff --git a/test/cuda/cufinufft1dspreadinterponly_test.cu b/test/cuda/cufinufft1dspreadinterponly_test.cu
index 438032bad..ceb51f0a1 100644
--- a/test/cuda/cufinufft1dspreadinterponly_test.cu
+++ b/test/cuda/cufinufft1dspreadinterponly_test.cu
@@ -4,8 +4,8 @@
 #include <iostream>
 #include <random>
 
-#include <finufft_common/common.h>
 #include <cufinufft.h>
+#include <finufft_common/common.h>
 
 #include <cufinufft/contrib/helper_cuda.h>
 #include <cufinufft/impl.h>
@@ -185,7 +185,7 @@ int run_test(int N1, int M, T tol, T checktol, int iflag, double upsampfac) {
 
   csum         = std::accumulate(c.begin(), c.end(), thrust::complex<T>(T(0), T(0)));
   auto sup_err = T(0.0);
-  for (auto cj : c) sup_err = std::max(sup_err, abs(cj - kersum));
+  for (auto cj : c) sup_err = std::max(sup_err, thrust::abs(cj - kersum));
   const auto rel_sup_err = sup_err / thrust::abs(kersum);
   printf("\trel sup err %.3g\n", rel_sup_err);
 
diff --git a/test/cuda/cufinufft2d1nupts_test.cu b/test/cuda/cufinufft2d1nupts_test.cu
index 9f1151db2..cc101d9d2 100644
--- a/test/cuda/cufinufft2d1nupts_test.cu
+++ b/test/cuda/cufinufft2d1nupts_test.cu
@@ -7,8 +7,8 @@
 #include <iostream>
 #include <random>
 
-#include <finufft_common/common.h>
 #include <cufinufft.h>
+#include <finufft_common/common.h>
 
 #include <cufinufft/impl.h>
 #include <cufinufft/utils.h>
@@ -194,16 +194,16 @@ template<typename T> int run_test(int method) {
   int nt1 = (int)(0.37 * N1), nt2 = (int)(0.26 * N2); // choose some mode index to check
   thrust::complex<T> Ft(0, 0), J(0, iflag);
   for (int j = 0; j < M1; ++j)
-    Ft += c1[j] * exp(J * (nt1 * x1[j] + nt2 * y1[j])); // crude direct
-  int it = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2);          // index in complex F as 1d array
+    Ft += c1[j] * thrust::exp(J * (nt1 * x1[j] + nt2 * y1[j])); // crude direct
+  int it = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2); // index in complex F as 1d array
 
   printf("[gpu   ] one mode: rel err in F[%d,%d] is %.3g (set 1)\n", (int)nt1, (int)nt2,
-         abs(Ft - fk1[it]) / infnorm(N, (std::complex<T> *)fk1.data()));
+         thrust::abs(Ft - fk1[it]) / infnorm(N, (std::complex<T> *)fk1.data()));
   Ft = thrust::complex<T>(0, 0);
   for (int j = 0; j < M2; ++j)
-    Ft += c2[j] * exp(J * (nt1 * x2[j] + nt2 * y2[j])); // crude direct
+    Ft += c2[j] * thrust::exp(J * (nt1 * x2[j] + nt2 * y2[j])); // crude direct
   printf("[gpu   ] one mode: rel err in F[%d,%d] is %.3g (set 2)\n", (int)nt1, (int)nt2,
-         abs(Ft - fk2[it]) / infnorm(N, (std::complex<T> *)fk2.data()));
+         thrust::abs(Ft - fk2[it]) / infnorm(N, (std::complex<T> *)fk2.data()));
 
   return 0;
 }
diff --git a/test/cuda/cufinufft2d_test.cu b/test/cuda/cufinufft2d_test.cu
index c7d812226..4126c0ce8 100644
--- a/test/cuda/cufinufft2d_test.cu
+++ b/test/cuda/cufinufft2d_test.cu
@@ -6,8 +6,8 @@
 #include <limits>
 #include <random>
 
-#include <finufft_common/common.h>
 #include <cufinufft.h>
+#include <finufft_common/common.h>
 
 #include <cufinufft/impl.h>
 #include <cufinufft/utils.h>
@@ -181,10 +181,10 @@ int run_test(int method, int type, int N1, int N2, int M, T tol, T checktol, int
     const int nt2         = 0.26 * N2; // choose some mode index to check
     thrust::complex<T> Ft = thrust::complex<T>(0, 0), J = thrust::complex<T>(0.0, iflag);
     for (int j = 0; j < M; ++j)
-      Ft += c[j] * exp(J * (nt1 * x[j] + nt2 * y[j])); // crude direct
-    const int it = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2); // index in complex F as 1d
-                                                       // array
-    rel_error = abs(Ft - fk[it]) / infnorm(N1, (std::complex<T> *)fk.data());
+      Ft += c[j] * thrust::exp(J * (nt1 * x[j] + nt2 * y[j])); // crude direct
+    const int it = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2);         // index in complex F as 1d
+                                                               // array
+    rel_error = thrust::abs(Ft - fk[it]) / infnorm(N1, (std::complex<T> *)fk.data());
     printf("[gpu   ] one mode: rel err in F[%d,%d] is %.3g\n", nt1, nt2, rel_error);
     if (type == 1 && static_cast<int64_t>(M) * N1 * N2 <= TEST_BIGPROB) {
       std::vector<thrust::complex<T>> Ft(N1 * N2);
@@ -201,9 +201,9 @@ int run_test(int method, int type, int N1, int N2, int M, T tol, T checktol, int
     int m = 0;
     for (int m2 = -(N2 / 2); m2 <= (N2 - 1) / 2; ++m2) // loop in correct order over F
       for (int m1 = -(N1 / 2); m1 <= (N1 - 1) / 2; ++m1)
-        ct += fk[m++] * exp(J * (m1 * x[jt] + m2 * y[jt])); // crude direct
+        ct += fk[m++] * thrust::exp(J * (m1 * x[jt] + m2 * y[jt])); // crude direct
 
-    rel_error = abs(c[jt] - ct) / infnorm(M, (std::complex<T> *)c.data());
+    rel_error = thrust::abs(c[jt] - ct) / infnorm(M, (std::complex<T> *)c.data());
     printf("[gpu   ] one targ: rel err in c[%d] is %.3g\n", jt, rel_error);
     if (type == 2 && static_cast<int64_t>(M) * N1 * N2 <= TEST_BIGPROB) {
       std::vector<thrust::complex<T>> ct(M);
@@ -219,9 +219,9 @@ int run_test(int method, int type, int N1, int N2, int M, T tol, T checktol, int
     thrust::complex<T> Ft = thrust::complex<T>(0, 0);
 
     for (int j = 0; j < M; ++j) {
-      Ft += c[j] * exp(J * (x[j] * s[jt] + y[j] * t[jt]));
+      Ft += c[j] * thrust::exp(J * (x[j] * s[jt] + y[j] * t[jt]));
     }
-    rel_error = abs(Ft - fk[jt]) / infnorm(N1 * N2, (std::complex<T> *)fk.data());
+    rel_error = thrust::abs(Ft - fk[jt]) / infnorm(N1 * N2, (std::complex<T> *)fk.data());
     printf("[gpu   ] one mode: rel err in F[%d] is %.3g\n", jt, rel_error);
     if (type == 3 && static_cast<int64_t>(M) * N1 * N2 <= TEST_BIGPROB) {
       std::vector<thrust::complex<T>> Ft(N1 * N2);
diff --git a/test/cuda/cufinufft2dmany_test.cu b/test/cuda/cufinufft2dmany_test.cu
index 3f3c24a82..8bcd72281 100644
--- a/test/cuda/cufinufft2dmany_test.cu
+++ b/test/cuda/cufinufft2dmany_test.cu
@@ -6,8 +6,8 @@
 #include <limits>
 #include <random>
 
-#include <finufft_common/common.h>
 #include <cufinufft.h>
+#include <finufft_common/common.h>
 
 #include <cufinufft/impl.h>
 #include <cufinufft/utils.h>
@@ -179,10 +179,10 @@ int run_test(int method, int type, int N1, int N2, int ntransf, int maxbatchsize
                                                         // check
     thrust::complex<T> Ft = thrust::complex<T>(0, 0), J = thrust::complex<T>(0.0, iflag);
     for (int j = 0; j < M; ++j)
-      Ft += c[j + i * M] * exp(J * (nt1 * x[j] + nt2 * y[j])); // crude direct
-    int it = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2); // index in complex F as 1d array
-    rel_error =
-        abs(Ft - fk[it + i * N]) / infnorm(N1, (std::complex<T> *)fk.data() + i * N);
+      Ft += c[j + i * M] * thrust::exp(J * (nt1 * x[j] + nt2 * y[j])); // crude direct
+    int it    = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2); // index in complex F as 1d array
+    rel_error = thrust::abs(Ft - fk[it + i * N]) /
+                infnorm(N1, (std::complex<T> *)fk.data() + i * N);
     printf("[gpu   ] %dth data one mode: rel err in F[%d,%d] is %.3g\n", i, nt1, nt2,
            rel_error);
   } else if (type == 2) {
@@ -195,9 +195,9 @@ int run_test(int method, int type, int N1, int N2, int ntransf, int maxbatchsize
     int m = 0;
     for (int m2 = -(N2 / 2); m2 <= (N2 - 1) / 2; ++m2) // loop in correct order over F
       for (int m1 = -(N1 / 2); m1 <= (N1 - 1) / 2; ++m1)
-        ct += fkstart[m++] * exp(J * (m1 * x[jt] + m2 * y[jt])); // crude direct
+        ct += fkstart[m++] * thrust::exp(J * (m1 * x[jt] + m2 * y[jt])); // crude direct
 
-    rel_error = abs(cstart[jt] - ct) / infnorm(M, (std::complex<T> *)c.data());
+    rel_error = thrust::abs(cstart[jt] - ct) / infnorm(M, (std::complex<T> *)c.data());
     printf("[gpu   ] %dth data one targ: rel err in c[%d] is %.3g\n", t, jt, rel_error);
   } else if (type == 3) {
     int jt                      = (N1 * N2) / 2; // check arbitrary choice of one targ pt
@@ -207,9 +207,10 @@ int run_test(int method, int type, int N1, int N2, int ntransf, int maxbatchsize
     const thrust::complex<T> *cstart = c.data() + (ntransf - 1) * M;
 
     for (int j = 0; j < M; ++j) {
-      Ft += cstart[j] * exp(J * (x[j] * s[jt] + y[j] * t[jt]));
+      Ft += cstart[j] * thrust::exp(J * (x[j] * s[jt] + y[j] * t[jt]));
     }
-    rel_error = abs(Ft - fkstart[jt]) / infnorm(N1 * N2, (std::complex<T> *)fk.data());
+    rel_error =
+        thrust::abs(Ft - fkstart[jt]) / infnorm(N1 * N2, (std::complex<T> *)fk.data());
     printf("[gpu   ] one mode: rel err in F[%d] is %.3g\n", jt, rel_error);
   }
 
diff --git a/test/cuda/cufinufft3d_test.cu b/test/cuda/cufinufft3d_test.cu
index 929cba767..4cfd82ab6 100644
--- a/test/cuda/cufinufft3d_test.cu
+++ b/test/cuda/cufinufft3d_test.cu
@@ -5,8 +5,8 @@
 #include <iostream>
 #include <random>
 
-#include <finufft_common/common.h>
 #include <cufinufft.h>
+#include <finufft_common/common.h>
 
 #include <cufinufft/impl.h>
 #include <cufinufft/utils.h>
@@ -189,14 +189,15 @@ int run_test(int method, int type, int N1, int N2, int N3, int M, T tol, T check
         nt3               = (int)(0.13 * N3); // choose some mode index to check
     thrust::complex<T> Ft = thrust::complex<T>(0, 0), J = thrust::complex<T>(0.0, iflag);
     for (int j = 0; j < M; ++j)
-      Ft += c[j] * exp(J * (nt1 * x[j] + nt2 * y[j] + nt3 * z[j])); // crude direct
+      Ft += c[j] * thrust::exp(J * (nt1 * x[j] + nt2 * y[j] + nt3 * z[j])); // crude
+                                                                            // direct
 
     int it = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2) + N1 * N2 * (N3 / 2 + nt3); // index
                                                                             // in
                                                                             // complex
                                                                             // F as 1d
                                                                             // array
-    rel_error = abs(Ft - fk[it]) / infnorm(N1, (std::complex<T> *)fk.data());
+    rel_error = thrust::abs(Ft - fk[it]) / infnorm(N1, (std::complex<T> *)fk.data());
     printf("[gpu   ] one mode: rel err in F[%d,%d,%d] is %.3g\n", nt1, nt2, nt3,
            rel_error);
     if (static_cast<int64_t>(M) * N1 * N2 * N3 <= TEST_BIGPROB) {
@@ -216,9 +217,11 @@ int run_test(int method, int type, int N1, int N2, int N3, int M, T tol, T check
       for (int m2 = -(N2 / 2); m2 <= (N2 - 1) / 2; ++m2) // loop in correct order
                                                          // over F
         for (int m1 = -(N1 / 2); m1 <= (N1 - 1) / 2; ++m1)
-          ct += fk[m++] * exp(J * (m1 * x[jt] + m2 * y[jt] + m3 * z[jt])); // crude direct
+          ct +=
+              fk[m++] * thrust::exp(J * (m1 * x[jt] + m2 * y[jt] + m3 * z[jt])); // crude
+                                                                                 // direct
 
-    rel_error = abs(c[jt] - ct) / infnorm(M, (std::complex<T> *)c.data());
+    rel_error = thrust::abs(c[jt] - ct) / infnorm(M, (std::complex<T> *)c.data());
     printf("[gpu   ] one targ: rel err in c[%ld] is %.3g\n", (int64_t)jt, rel_error);
     if (static_cast<int64_t>(M) * N1 * N2 * N3 <= TEST_BIGPROB) {
       std::vector<thrust::complex<T>> ct(M);
@@ -234,9 +237,10 @@ int run_test(int method, int type, int N1, int N2, int N3, int M, T tol, T check
     thrust::complex<T> Ft = thrust::complex<T>(0, 0);
 
     for (int j = 0; j < M; ++j) {
-      Ft += c[j] * exp(J * (x[j] * s[jt] + y[j] * t[jt] + z[j] * u[jt]));
+      Ft += c[j] * thrust::exp(J * (x[j] * s[jt] + y[j] * t[jt] + z[j] * u[jt]));
     }
-    rel_error = abs(Ft - fk[jt]) / infnorm(N1 * N2 * N3, (std::complex<T> *)fk.data());
+    rel_error =
+        thrust::abs(Ft - fk[jt]) / infnorm(N1 * N2 * N3, (std::complex<T> *)fk.data());
     printf("[gpu   ] one mode: rel err in F[%d] is %.3g\n", jt, rel_error);
     if (static_cast<int64_t>(M) * N1 * N2 * N3 <= TEST_BIGPROB) {
       std::vector<thrust::complex<T>> Ft(N1 * N2 * N3);
diff --git a/test/dumbinputs.cpp b/test/dumbinputs.cpp
index 866458c86..498d483fe 100644
--- a/test/dumbinputs.cpp
+++ b/test/dumbinputs.cpp
@@ -37,9 +37,10 @@
 #include "utils/dirft1d.hpp"
 #include "utils/dirft2d.hpp"
 #include "utils/norms.hpp"
+#include <cmath>
 #include <finufft/test_defs.h>
 
-using namespace std;
+
 
 int main(int argc, char *argv[]) {
   int M = 100;    // number of nonuniform points
@@ -57,13 +58,13 @@ int main(int argc, char *argv[]) {
   FLT *x = (FLT *)malloc(sizeof(FLT) * M);
   CPX *c = (CPX *)malloc(sizeof(CPX) * M);
   for (int j = 0; j < M; ++j) {
-    x[j] = PI * cos((FLT)j); // deterministic
-    c[j] = sin((FLT)1.3 * j) + IMA * cos((FLT)0.9 * j);
+    x[j] = PI * std::cos((FLT)j); // deterministic
+    c[j] = std::sin((FLT)1.3 * j) + IMA * std::cos((FLT)0.9 * j);
   }
   // allocate output array F for Fourier modes, fix some type-3 coords...
   CPX *F = (CPX *)malloc(sizeof(CPX) * NN);
   FLT *s = (FLT *)malloc(sizeof(FLT) * N);
-  for (int k = 0; k < N; ++k) s[k] = 10 * cos(1.2 * k); // normal-sized coords
+  for (int k = 0; k < N; ++k) s[k] = 10 * std::cos(1.2 * k); // normal-sized coords
   FLT *shuge = (FLT *)malloc(sizeof(FLT) * N);
   FLT huge   = 1e12;                                    // no smaller than MAX_NF
   for (int k = 0; k < N; ++k) shuge[k] = huge * s[k];   // some huge coords
@@ -116,7 +117,7 @@ int main(int argc, char *argv[]) {
     return 1;
   }
   for (int k = 0; k < NN; ++k)
-    F[k] = sin((FLT)0.7 * k) + IMA * cos((FLT)0.3 * k); // set F for t2
+    F[k] = std::sin((FLT)0.7 * k) + IMA * std::cos((FLT)0.3 * k); // set F for t2
   ier = FINUFFT1D2(M, x, c, +1, 0, N, F, &opts);
   if (ier != FINUFFT_WARN_EPS_TOO_SMALL) {
     printf("1d2 tol=0:\twrong err code %d\n", ier);
@@ -134,7 +135,7 @@ int main(int argc, char *argv[]) {
     return ier;
   }
   for (int j = 0; j < M; ++j)
-    c[j] = sin((FLT)1.3 * j) + IMA * cos((FLT)0.9 * j); // reset c for t3
+    c[j] = std::sin((FLT)1.3 * j) + IMA * std::cos((FLT)0.9 * j); // reset c for t3
   ier = FINUFFT1D3(M, x, c, +1, 0, N, s, F, &opts);
   if (ier != FINUFFT_WARN_EPS_TOO_SMALL) {
     printf("1d3 tol=0:\twrong err code %d\n", ier);
@@ -165,21 +166,21 @@ int main(int argc, char *argv[]) {
   ier = FINUFFT1D3(1, x, c, +1, acc, N, s, F, &opts); // XK prod formally 0
   dirft1d3(1, x, c, +1, N, s, Fe);
   for (int k = 0; k < N; ++k) F[k] -= Fe[k];          // acc chk
-  FLT err = twonorm(N, F) / sqrt((FLT)N);
+  FLT err = twonorm(N, F) / std::sqrt((FLT)N);
   if (ier || err > 100 * acc) {
     printf("1d3 M=1:\tier=%d nrm(err)=%.3g\n", ier, err);
     return 1;
   }
   ier = FINUFFT1D3(M, x, c, +1, acc, 1, s, F, &opts);
   dirft1d3(M, x, c, +1, 1, s, Fe);
-  err = abs(F[0] - Fe[0]);
+  err = std::abs(F[0] - Fe[0]);
   if (ier || err > 10 * acc) {
     printf("1d3 nk=1:\tier=%d err=%.3g\n", ier, err);
     return 1;
   }
   ier = FINUFFT1D3(1, x, c, +1, acc, 1, s, F, &opts);
   dirft1d3(1, x, c, +1, 1, s, Fe);
-  err = abs(F[0] - Fe[0]);
+  err = std::abs(F[0] - Fe[0]);
   if (ier || err > 10 * acc) {
     printf("1d3 M=nk=1:\tier=%d err=%.3g\n", ier, err);
     return 1;
@@ -193,7 +194,7 @@ int main(int argc, char *argv[]) {
   CPX *cm   = (CPX *)malloc(sizeof(CPX) * M * ndata);
   CPX *Fm   = (CPX *)malloc(sizeof(CPX) * NN * ndata);   // the biggest array
   for (int j = 0; j < M * ndata; ++j)
-    cm[j] = sin((FLT)1.3 * j) + IMA * cos((FLT)0.9 * j); // set cm for 1d1many
+    cm[j] = std::sin((FLT)1.3 * j) + IMA * std::cos((FLT)0.9 * j); // set cm for 1d1many
   ier = FINUFFT1D1MANY(0, M, x, cm, +1, 0, N, Fm, &opts);
   if (ier != FINUFFT_ERR_NTRANS_NOTVALID) {
     printf("1d1many ndata=0:\twrong err code %d\n", ier);
@@ -216,7 +217,7 @@ int main(int argc, char *argv[]) {
     return 1;
   }
   for (int k = 0; k < NN * ndata; ++k)
-    Fm[k] = sin((FLT)0.7 * k) + IMA * cos((FLT)0.3 * k); // set Fm for 1d2many
+    Fm[k] = std::sin((FLT)0.7 * k) + IMA * std::cos((FLT)0.3 * k); // set Fm for 1d2many
   ier = FINUFFT1D2MANY(0, M, x, cm, +1, 0, N, Fm, &opts);
   if (ier != FINUFFT_ERR_NTRANS_NOTVALID) {
     printf("1d2many ndata=0:\twrong err code %d\n", ier);
@@ -239,7 +240,7 @@ int main(int argc, char *argv[]) {
     return ier;
   }
   for (int j = 0; j < M * ndata; ++j)
-    cm[j] = sin((FLT)1.3 * j) + IMA * cos((FLT)0.9 * j); // reset cm for 1d3many
+    cm[j] = std::sin((FLT)1.3 * j) + IMA * std::cos((FLT)0.9 * j); // reset cm for 1d3many
   ier = FINUFFT1D3MANY(0, M, x, cm, +1, acc, N, s, Fm, &opts);
   if (ier != FINUFFT_ERR_NTRANS_NOTVALID) {
     printf("1d3many ndata=0:\twrong err code %d\n", ier);
@@ -261,21 +262,21 @@ int main(int argc, char *argv[]) {
   ier = FINUFFT1D3MANY(ndata, 1, x, cm, +1, acc, N, s, Fm, &opts); // XK prod formally 0
   dirft1d3(1, x, c, +1, N, s, Fe);
   for (int k = 0; k < N; ++k) Fm[k] -= Fe[k];                      // acc chk
-  err = twonorm(N, Fm) / sqrt((FLT)N); // rms, to 5e-5 abs; check just first trial
+  err = twonorm(N, Fm) / std::sqrt((FLT)N); // rms, to 5e-5 abs; check just first trial
   if (ier || err > 100 * acc) {
     printf("1d3many M=1:\tier=%d nrm(err)=%.3g\n", ier, err);
     return 1;
   }
   ier = FINUFFT1D3MANY(ndata, M, x, cm, +1, acc, 1, s, Fm, &opts);
   dirft1d3(M, x, c, +1, 1, s, Fe);
-  err = abs(Fm[0] - Fe[0]);
+  err = std::abs(Fm[0] - Fe[0]);
   if (ier || err > 10 * acc) {
     printf("1d3many nk=1:\tier=%d err=%.3g\n", ier, err);
     return 1;
   }
   ier = FINUFFT1D3MANY(ndata, 1, x, cm, +1, acc, 1, s, Fm, &opts);
   dirft1d3(1, x, c, +1, 1, s, Fe);
-  err = abs(Fm[0] - Fe[0]);
+  err = std::abs(Fm[0] - Fe[0]);
   if (ier || err > 10 * acc) {
     printf("1d3many M=nk=1:\tier=%d err=%.3g\n", ier, err);
     return 1;
@@ -315,7 +316,7 @@ int main(int argc, char *argv[]) {
     return 1;
   }
   for (int k = 0; k < NN; ++k)
-    F[k] = sin((FLT)0.7 * k) + IMA * cos((FLT)0.3 * k); // set F for t2
+    F[k] = std::sin((FLT)0.7 * k) + IMA * std::cos((FLT)0.3 * k); // set F for t2
   ier = FINUFFT2D2(M, x, x, c, +1, 0, N, N, F, &opts);
   if (ier != FINUFFT_WARN_EPS_TOO_SMALL) {
     printf("2d2 tol=0:\twrong err code %d\n", ier);
@@ -345,7 +346,7 @@ int main(int argc, char *argv[]) {
     return ier;
   }
   for (int j = 0; j < M; ++j)
-    c[j] = sin((FLT)1.3 * j) + IMA * cos((FLT)0.9 * j); // reset c for t3
+    c[j] = std::sin((FLT)1.3 * j) + IMA * std::cos((FLT)0.9 * j); // reset c for t3
   ier = FINUFFT2D3(M, x, x, c, +1, 0, N, s, s, F, &opts);
   if (ier != FINUFFT_WARN_EPS_TOO_SMALL) {
     printf("2d3 tol=0:\twrong err code %d\n", ier);
@@ -368,14 +369,14 @@ int main(int argc, char *argv[]) {
     printf("2d3 M=nk=1:\tier=%d\n", ier);
     return ier;
   }
-  for (int k = 0; k < N; ++k) shuge[k] = sqrt(huge) * s[k]; // less huge coords
+  for (int k = 0; k < N; ++k) shuge[k] = std::sqrt(huge) * s[k]; // less huge coords
   ier = FINUFFT2D3(M, x, x, c, +1, acc, N, shuge, shuge, F, &opts);
   if (ier == 0) { // any nonzero code accepted here
     printf("2d3 XK prod too big:\twrong error code %d\n", ier);
     return 1;
   }
   for (int j = 0; j < M * ndata; ++j)
-    cm[j] = sin((FLT)1.3 * j) + IMA * cos((FLT)0.9 * j); // reset cm for 2d1many
+    cm[j] = std::sin((FLT)1.3 * j) + IMA * std::cos((FLT)0.9 * j); // reset cm for 2d1many
   ier = FINUFFT2D1MANY(0, M, x, x, cm, +1, 0, N, N, Fm, &opts);
   if (ier != FINUFFT_ERR_NTRANS_NOTVALID) {
     printf("2d1many ndata=0:\twrong err code %d\n", ier);
@@ -408,7 +409,7 @@ int main(int argc, char *argv[]) {
     return 1;
   }
   for (int k = 0; k < NN * ndata; ++k)
-    Fm[k] = sin((FLT)0.7 * k) + IMA * cos((FLT)0.3 * k); // reset Fm for t2
+    Fm[k] = std::sin((FLT)0.7 * k) + IMA * std::cos((FLT)0.3 * k); // reset Fm for t2
   ier = FINUFFT2D2MANY(0, M, x, x, cm, +1, 0, N, N, Fm, &opts);
   if (ier != FINUFFT_ERR_NTRANS_NOTVALID) {
     printf("2d2many ndata=0:\twrong err code %d\n", ier);
@@ -505,7 +506,7 @@ int main(int argc, char *argv[]) {
     return 1;
   }
   for (int k = 0; k < NN; ++k)
-    F[k] = sin((FLT)0.8 * k) - IMA * cos((FLT)0.3 * k); // set F for t2
+    F[k] = std::sin((FLT)0.8 * k) - IMA * std::cos((FLT)0.3 * k); // set F for t2
   ier = FINUFFT3D2(M, x, x, x, c, +1, 0, N, N, N, F, &opts);
   if (ier != FINUFFT_WARN_EPS_TOO_SMALL) {
     printf("3d2 tol=0:\twrong err code %d\n", ier);
@@ -541,7 +542,7 @@ int main(int argc, char *argv[]) {
     return ier;
   }
   for (int j = 0; j < M; ++j)
-    c[j] = sin((FLT)1.2 * j) - IMA * cos((FLT)0.8 * j); // reset c for t3
+    c[j] = std::sin((FLT)1.2 * j) - IMA * std::cos((FLT)0.8 * j); // reset c for t3
   ier = FINUFFT3D3(M, x, x, x, c, +1, 0, N, s, s, s, F, &opts);
   if (ier != FINUFFT_WARN_EPS_TOO_SMALL) {
     printf("3d3 tol=0:\twrong err code %d\n", ier);
@@ -564,14 +565,15 @@ int main(int argc, char *argv[]) {
     printf("3d3 M=nk=1:\tier=%d\n", ier);
     return ier;
   }
-  for (int k = 0; k < N; ++k) shuge[k] = pow(huge, 1. / 3) * s[k]; // less huge coords
+  for (int k = 0; k < N; ++k)
+    shuge[k] = std::pow(huge, 1. / 3) * s[k]; // less huge coords
   ier = FINUFFT3D3(M, x, x, x, c, +1, acc, N, shuge, shuge, shuge, F, &opts);
   if (ier == 0) { // any nonzero code accepted here
     printf("3d3 XK prod too big:\twrong error code %d\n", ier);
     return 1;
   }
   for (int j = 0; j < M * ndata; ++j)
-    cm[j] = sin(-(FLT)1.2 * j) + IMA * cos((FLT)1.1 * j); // reset cm for 3d1many
+    cm[j] = std::sin(-(FLT)1.2 * j) + IMA * std::cos((FLT)1.1 * j); // reset cm for 3d1many
   ier = FINUFFT3D1MANY(0, M, x, x, x, cm, +1, 0, N, N, N, Fm, &opts);
   if (ier != FINUFFT_ERR_NTRANS_NOTVALID) {
     printf("3d1many ndata=0:\twrong err code %d\n", ier);
@@ -609,7 +611,7 @@ int main(int argc, char *argv[]) {
     return 1;
   }
   for (int k = 0; k < NN * ndata; ++k)
-    Fm[k] = sin((FLT)0.6 * k) - IMA * cos((FLT)0.3 * k); // reset Fm for t2
+    Fm[k] = std::sin((FLT)0.6 * k) - IMA * std::cos((FLT)0.3 * k); // reset Fm for t2
   ier = FINUFFT3D2MANY(0, M, x, x, x, cm, +1, 0, N, N, N, Fm, &opts);
   if (ier != FINUFFT_ERR_NTRANS_NOTVALID) {
     printf("3d2many ndata=0:\twrong err code %d\n", ier);
diff --git a/test/finufft1d_test.cpp b/test/finufft1d_test.cpp
index 0c38bb9cc..c2c102c0e 100644
--- a/test/finufft1d_test.cpp
+++ b/test/finufft1d_test.cpp
@@ -3,7 +3,12 @@
 #include "finufft/finufft_utils.hpp"
 #include "utils/dirft1d.hpp"
 #include "utils/norms.hpp"
-using namespace std;
+
+#include <algorithm>
+#include <cmath>
+#include <iomanip>
+#include <iostream>
+
 using namespace finufft::utils;
 
 const char *help[] = {
@@ -41,7 +46,7 @@ int main(int argc, char *argv[]) {
   }
   if (argc > 7) sscanf(argv[7], "%lf", &errfail);
 
-  cout << scientific << setprecision(15);
+  std::cout << std::scientific << std::setprecision(15);
 
   FLT *x = (FLT *)malloc(sizeof(FLT) * M); // NU pts
   CPX *c = (CPX *)malloc(sizeof(CPX) * M); // strengths
@@ -62,7 +67,7 @@ int main(int argc, char *argv[]) {
   CNTime timer;
   timer.start();
   int ier = FINUFFT1D1(M, x, c, isign, tol, N, F, &opts);
-  // for (int j=0;j<N;++j) cout<<F[j]<<endl;
+  // for (int j=0;j<N;++j) std::cout<<F[j]<<endl;
   double t = timer.elapsedsec();
   if (ier > 1) {
     printf("error (ier=%d)!\n", ier);
@@ -82,17 +87,17 @@ int main(int argc, char *argv[]) {
     Ftr += real(c[j]) * co - imag(c[j]) * si; // cpx arith by hand
     Fti += imag(c[j]) * co + real(c[j]) * si;
   }
-  err = abs(Ftr + IMA * Fti - F[N / 2 + nt]) / infnorm(N, F);
+  err = std::abs(Ftr + IMA * Fti - F[N / 2 + nt]) / infnorm(N, F);
   printf("\tone mode: rel err in F[%lld] is %.3g\n", (long long)nt, err);
   if (((int64_t)M) * N <= TEST_BIGPROB) { // also full direct eval
     CPX *Ft = (CPX *)malloc(sizeof(CPX) * N);
     dirft1d1(M, x, c, isign, N, Ft);
     err    = relerrtwonorm(N, Ft, F);
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
     printf("\tdirft1d: rel l2-err of result F is %.3g\n", err);
     free(Ft);
   } else
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
 
   printf("test 1d type 2:\n"); // -------------- type 2
 #pragma omp parallel
@@ -117,19 +122,19 @@ int main(int argc, char *argv[]) {
   BIGINT m = 0, k0 = N / 2; // index shift in fk's = mag of most neg freq
   // #pragma omp parallel for schedule(static,TEST_RANDCHUNK) reduction(cmplxadd:ct)
   for (BIGINT m1 = -k0; m1 <= (N - 1) / 2; ++m1)
-    ct += F[m++] * exp(IMA * ((FLT)(isign * m1)) * x[jt]); // crude direct
-  err = abs(ct - c[jt]) / infnorm(M, c);
+    ct += F[m++] * std::exp(IMA * ((FLT)(isign * m1)) * x[jt]); // crude direct
+  err = std::abs(ct - c[jt]) / infnorm(M, c);
   printf("\tone targ: rel err in c[%lld] is %.3g\n", (long long)jt, err);
   if (((int64_t)M) * N <= TEST_BIGPROB) { // also full direct eval
     CPX *ct = (CPX *)malloc(sizeof(CPX) * M);
     dirft1d2(M, x, ct, isign, N, F);
     err    = relerrtwonorm(M, ct, c);
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
     printf("\tdirft1d: rel l2-err of result c is %.3g\n", err);
     // cout<<"c/ct:\n"; for (int j=0;j<M;++j) cout<<c[j]/ct[j]<<endl;
     free(ct);
   } else
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
 
   printf("test 1d type 3:\n"); // -------------- type 3
                                // reuse the strengths c, interpret N as number of targs:
@@ -167,24 +172,24 @@ int main(int argc, char *argv[]) {
     Ftr += real(c[j]) * co - imag(c[j]) * si; // cpx arith by hand
     Fti += imag(c[j]) * co + real(c[j]) * si;
   }
-  err = abs(Ftr + IMA * Fti - F[kt]) / infnorm(N, F);
+  err = std::abs(Ftr + IMA * Fti - F[kt]) / infnorm(N, F);
   printf("\tone targ: rel err in F[%lld] is %.3g\n", (long long)kt, err);
   if (((int64_t)M) * N <= TEST_BIGPROB) { // also full direct eval
     CPX *Ft = (CPX *)malloc(sizeof(CPX) * N);
     dirft1d3(M, x, c, isign, N, s, Ft);   // writes to F
     err    = relerrtwonorm(N, Ft, F);
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
     printf("\tdirft1d: rel l2-err of result F is %.3g\n", err);
     // cout<<"s, F, Ft:\n"; for (int k=0;k<N;++k) cout<<s[k]<<"
     // "<<F[k]<<"\t"<<Ft[k]<<"\t"<<F[k]/Ft[k]<<endl;
     free(Ft);
   } else
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
   free(x);
   free(c);
   free(F);
   free(s);
-  if (isnan(errmax) || (errmax > errfail)) {
+  if (std::isnan(errmax) || (errmax > errfail)) {
     printf("\tfailed! err %.3g > errfail %.3g\n", errmax, errfail);
     return 1;
   } else
diff --git a/test/finufft1dmany_test.cpp b/test/finufft1dmany_test.cpp
index 1786766bf..aeb80f81e 100644
--- a/test/finufft1dmany_test.cpp
+++ b/test/finufft1dmany_test.cpp
@@ -3,7 +3,11 @@
 #include "finufft/finufft_utils.hpp"
 #include "utils/dirft1d.hpp"
 #include "utils/norms.hpp"
-using namespace std;
+
+#include <algorithm>
+#include <cmath>
+#include <iomanip>
+#include <iostream>
 using namespace finufft::utils;
 
 const char *help[] = {
@@ -47,7 +51,7 @@ int main(int argc, char *argv[]) {
   }
   if (argc > 10) sscanf(argv[10], "%lf", &errfail);
 
-  cout << scientific << setprecision(15);
+  std::cout << std::scientific << std::setprecision(15);
 
   FLT *x = (FLT *)malloc(sizeof(FLT) * M);           // NU pts x coords
   CPX *c = (CPX *)malloc(sizeof(CPX) * M * ntransf); // strengths
@@ -82,9 +86,9 @@ int main(int argc, char *argv[]) {
   BIGINT nt1 = (BIGINT)(0.37 * N); // choose some mode index to check
   CPX Ft = CPX(0, 0), J = IMA * (FLT)isign;
   for (BIGINT j = 0; j < M; ++j)
-    Ft += c[j + i * M] * exp(J * (nt1 * x[j])); // crude direct
+    Ft += c[j + i * M] * std::exp(J * (nt1 * x[j])); // crude direct
   BIGINT it = N / 2 + nt1;                      // index in complex F as 1d array
-  err       = abs(Ft - F[it + i * N]) / infnorm(N, F + i * N);
+  err       = std::abs(Ft - F[it + i * N]) / infnorm(N, F + i * N);
   printf("\tone mode: rel err in F[%lld] of trans#%d is %.3g\n", (long long)nt1, i, err);
 
   // compare the result with FINUFFT1D1
@@ -113,8 +117,8 @@ int main(int argc, char *argv[]) {
   // Check consistency (worst over the ntransf)
   double maxerror = 0.0;
   for (int k = 0; k < ntransf; ++k)
-    maxerror = max(maxerror, (double)relerrtwonorm(N, F_1d1 + k * N, F + k * N));
-  errmax = max(maxerror, errmax);
+    maxerror = std::max(maxerror, (double)relerrtwonorm(N, F_1d1 + k * N, F + k * N));
+  errmax = std::max(maxerror, errmax);
   printf("\tconsistency check: sup ( ||f_many-f||_2 / ||f||_2  ) =  %.3g\n", maxerror);
   free(F_1d1);
 
@@ -143,8 +147,8 @@ int main(int argc, char *argv[]) {
   BIGINT m = 0, k0 = N / 2; // index shift in fk's = mag of most neg freq
   // #pragma omp parallel for schedule(static,TEST_RANDCHUNK) reduction(cmplxadd:ct)
   for (BIGINT m1 = -k0; m1 <= (N - 1) / 2; ++m1)
-    ct += F[i * N + m++] * exp(IMA * ((FLT)(isign * m1)) * x[jt]); // crude direct
-  err = abs(ct - c[jt + i * M]) / infnorm(M, c + i * M);
+    ct += F[i * N + m++] * std::exp(IMA * ((FLT)(isign * m1)) * x[jt]); // crude direct
+  err = std::abs(ct - c[jt + i * M]) / infnorm(M, c + i * M);
   printf("\tone targ: rel err in c[%lld] of trans#%d is %.3g\n", (long long)jt, i, err);
 
   // check against single calls to FINUFFT1D2...
@@ -167,8 +171,8 @@ int main(int argc, char *argv[]) {
 
   maxerror = 0.0; // worst error over the ntransf
   for (int k = 0; k < ntransf; ++k)
-    maxerror = max(maxerror, (double)relerrtwonorm(M, c_1d2 + k * M, c + k * M));
-  errmax = max(maxerror, errmax);
+    maxerror = std::max(maxerror, (double)relerrtwonorm(M, c_1d2 + k * M, c + k * M));
+  errmax = std::max(maxerror, errmax);
   printf("\tconsistency check: sup ( ||c_many-c||_2 / ||c||_2 ) =  %.3g\n", maxerror);
   free(c_1d2);
 
@@ -208,8 +212,8 @@ int main(int argc, char *argv[]) {
   Ft        = CPX(0, 0);
   // #pragma omp parallel for schedule(static,TEST_RANDCHUNK) reduction(cmplxadd:Ft)
   for (BIGINT j = 0; j < M; ++j)
-    Ft += c[j + i * M] * exp(IMA * (FLT)isign * s[kt] * x[j]);
-  err = abs(Ft - F[kt + i * N]) / infnorm(N, F + i * N);
+    Ft += c[j + i * M] * std::exp(IMA * (FLT)isign * s[kt] * x[j]);
+  err = std::abs(Ft - F[kt + i * N]) / infnorm(N, F + i * N);
   printf("\tone targ: rel err in F[%lld] of trans#%d is %.3g\n", (long long)kt, i, err);
 
   // compare the result with single calls to FINUFFT1D3...
@@ -232,15 +236,15 @@ int main(int argc, char *argv[]) {
 
   maxerror = 0.0; // worst error over the ntransf
   for (int k = 0; k < ntransf; ++k)
-    maxerror = max(maxerror, (double)relerrtwonorm(N, f_1d3 + k * N, F + k * N));
-  errmax = max(maxerror, errmax);
+    maxerror = std::max(maxerror, (double)relerrtwonorm(N, f_1d3 + k * N, F + k * N));
+  errmax = std::max(maxerror, errmax);
   printf("\tconsistency check: sup ( ||f_many-f||_2 / ||f||_2 ) =  %.3g\n", maxerror);
   free(f_1d3);
   free(x);
   free(s);
   free(c);
   free(F);
-  if (isnan(errmax) || (errmax > errfail)) {
+  if (std::isnan(errmax) || (errmax > errfail)) {
     printf("\tfailed! err %.3g > errfail %.3g\n", errmax, errfail);
     return 1;
   } else
diff --git a/test/finufft2d_test.cpp b/test/finufft2d_test.cpp
index e0dbbde05..07fe394c6 100644
--- a/test/finufft2d_test.cpp
+++ b/test/finufft2d_test.cpp
@@ -4,7 +4,10 @@
 #include "utils/dirft2d.hpp"
 #include "utils/norms.hpp"
 
-using namespace std;
+#include <algorithm>
+#include <cmath>
+#include <iomanip>
+#include <iostream>
 using namespace finufft::utils;
 
 const char *help[] = {"Tester for FINUFFT in 2d, all 3 types, either precision.",
@@ -44,7 +47,7 @@ int main(int argc, char *argv[]) {
   }
   if (argc > 8) sscanf(argv[8], "%lf", &errfail);
 
-  cout << scientific << setprecision(15);
+  std::cout << std::scientific << std::setprecision(15);
   BIGINT N = N1 * N2;
 
   FLT *x = (FLT *)malloc(sizeof(FLT) * M); // NU pts x coords
@@ -84,18 +87,18 @@ int main(int argc, char *argv[]) {
     Fti += imag(c[j]) * co + real(c[j]) * si;
   }
   BIGINT it = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2); // index in complex F as 1d array
-  err       = abs(Ftr + IMA * Fti - F[it]) / infnorm(N, F);
+  err       = std::abs(Ftr + IMA * Fti - F[it]) / infnorm(N, F);
   printf("\tone mode: rel err in F[%lld,%lld] is %.3g\n", (long long)nt1, (long long)nt2,
          err);
   if ((int64_t)M * N <= TEST_BIGPROB) { // also check vs full direct eval
     CPX *Ft = (CPX *)malloc(sizeof(CPX) * N);
     dirft2d1(M, x, y, c, isign, N1, N2, Ft);
     err    = relerrtwonorm(N, Ft, F);
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
     printf("\tdirft2d: rel l2-err of result F is %.3g\n", err);
     free(Ft);
   } else
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
 
   printf("test 2d type 2:\n"); // -------------- type 2
 #pragma omp parallel
@@ -119,20 +122,21 @@ int main(int argc, char *argv[]) {
   BIGINT m  = 0;
   for (BIGINT m2 = -(N2 / 2); m2 <= (N2 - 1) / 2; ++m2) // loop in correct order over F
     for (BIGINT m1 = -(N1 / 2); m1 <= (N1 - 1) / 2; ++m1)
-      ct += F[m++] * exp(IMA * (FLT)isign * (m1 * x[jt] + m2 * y[jt])); // crude
+      ct += F[m++] *
+            std::exp(IMA * (FLT)isign * (m1 * x[jt] + m2 * y[jt])); // crude
                                                                         // direct
-  err = abs(ct - c[jt]) / infnorm(M, c);
+  err = std::abs(ct - c[jt]) / infnorm(M, c);
   printf("\tone targ: rel err in c[%lld] is %.3g\n", (long long)jt, err);
   if ((int64_t)M * N <= TEST_BIGPROB) { // also full direct eval
     CPX *ct = (CPX *)malloc(sizeof(CPX) * M);
     dirft2d2(M, x, y, ct, isign, N1, N2, F);
     err    = relerrtwonorm(M, ct, c);
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
     printf("\tdirft2d: rel l2-err of result c is %.3g\n", err);
     // cout<<"c,ct:\n"; for (int j=0;j<M;++j) cout<<c[j]<<"\t"<<ct[j]<<endl;
     free(ct);
   } else
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
 
   printf("test 2d type 3:\n"); // -------------- type 3
                                // reuse the strengths c, interpret N as number of targs:
@@ -177,19 +181,19 @@ int main(int argc, char *argv[]) {
     Ftr += real(c[j]) * co - imag(c[j]) * si; // cpx arith by hand
     Fti += imag(c[j]) * co + real(c[j]) * si;
   }
-  err = abs(Ftr + IMA * Fti - F[kt]) / infnorm(N, F);
+  err = std::abs(Ftr + IMA * Fti - F[kt]) / infnorm(N, F);
   printf("\tone targ: rel err in F[%lld] is %.3g\n", (long long)kt, err);
   if (((int64_t)M) * N <= TEST_BIGPROB) {     // also full direct eval
     CPX *Ft = (CPX *)malloc(sizeof(CPX) * N);
     dirft2d3(M, x, y, c, isign, N, s, t, Ft); // writes to F
     err    = relerrtwonorm(N, Ft, F);
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
     printf("\tdirft2d: rel l2-err of result F is %.3g\n", err);
     // cout<<"s t, F, Ft, F/Ft:\n"; for (int k=0;k<N;++k) cout<<s[k]<<" "<<t[k]<<",
     // "<<F[k]<<",\t"<<Ft[k]<<",\t"<<F[k]/Ft[k]<<endl;
     free(Ft);
   } else
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
 
   free(x);
   free(y);
@@ -197,7 +201,7 @@ int main(int argc, char *argv[]) {
   free(F);
   free(s);
   free(t);
-  if (isnan(errmax) || (errmax > errfail)) {
+  if (std::isnan(errmax) || (errmax > errfail)) {
     printf("\tfailed! err %.3g > errfail %.3g\n", errmax, errfail);
     return 1;
   } else
diff --git a/test/finufft2dmany_test.cpp b/test/finufft2dmany_test.cpp
index 50ee3eb33..cb532fa62 100644
--- a/test/finufft2dmany_test.cpp
+++ b/test/finufft2dmany_test.cpp
@@ -3,7 +3,11 @@
 #include "finufft/finufft_utils.hpp"
 #include "utils/dirft2d.hpp"
 #include "utils/norms.hpp"
-using namespace std;
+
+#include <algorithm>
+#include <cmath>
+#include <iomanip>
+#include <iostream>
 using namespace finufft::utils;
 
 const char *help[] = {
@@ -49,7 +53,7 @@ int main(int argc, char *argv[]) {
   }
   if (argc > 11) sscanf(argv[11], "%lf", &errfail);
 
-  cout << scientific << setprecision(15);
+  std::cout << std::scientific << std::setprecision(15);
   BIGINT N = N1 * N2;
 
   FLT *x = (FLT *)malloc(sizeof(FLT) * M);           // NU pts x coords
@@ -88,9 +92,9 @@ int main(int argc, char *argv[]) {
                                                                // to check
   CPX Ft = CPX(0, 0), J = IMA * (FLT)isign;
   for (BIGINT j = 0; j < M; ++j)
-    Ft += c[j + i * M] * exp(J * (nt1 * x[j] + nt2 * y[j])); // crude direct
+    Ft += c[j + i * M] * std::exp(J * (nt1 * x[j] + nt2 * y[j])); // crude direct
   BIGINT it = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2); // index in complex F as 1d array
-  err       = abs(Ft - F[it + i * N]) / infnorm(N, F + i * N);
+  err       = std::abs(Ft - F[it + i * N]) / infnorm(N, F + i * N);
   printf("\tone mode: rel err in F[%lld,%lld] of trans#%d is %.3g\n", (long long)nt1,
          (long long)nt2, i, err);
 
@@ -121,8 +125,8 @@ int main(int argc, char *argv[]) {
   // Check consistency (worst over the ntransf)
   double maxerror = 0.0;
   for (int k = 0; k < ntransf; ++k)
-    maxerror = max(maxerror, (double)relerrtwonorm(N, F_2d1 + k * N, F + k * N));
-  errmax = max(maxerror, errmax);
+    maxerror = std::max(maxerror, (double)relerrtwonorm(N, F_2d1 + k * N, F + k * N));
+  errmax = std::max(maxerror, errmax);
   printf("\tconsistency check: sup ( ||f_many-f||_2 / ||f||_2  ) =  %.3g\n", maxerror);
   free(F_2d1);
 
@@ -153,8 +157,8 @@ int main(int argc, char *argv[]) {
   BIGINT m  = 0;
   for (BIGINT m2 = -(N2 / 2); m2 <= (N2 - 1) / 2; ++m2) // loop in correct order over F
     for (BIGINT m1 = -(N1 / 2); m1 <= (N1 - 1) / 2; ++m1)
-      ct += F[i * N + m++] * exp(J * (m1 * x[jt] + m2 * y[jt])); // crude direct
-  err = abs(ct - c[jt + i * M]) / infnorm(M, c + i * M);
+      ct += F[i * N + m++] * std::exp(J * (m1 * x[jt] + m2 * y[jt])); // crude direct
+  err = std::abs(ct - c[jt + i * M]) / infnorm(M, c + i * M);
   printf("\tone targ: rel err in c[%lld] of trans#%d is %.3g\n", (long long)jt, i, err);
 
   // compare the result with single calls to FINUFFT2D2...
@@ -176,8 +180,8 @@ int main(int argc, char *argv[]) {
 
   maxerror = 0.0; // worst error over the ntransf
   for (int k = 0; k < ntransf; ++k)
-    maxerror = max(maxerror, (double)relerrtwonorm(M, c_2d2 + k * M, c + k * M));
-  errmax = max(maxerror, errmax);
+    maxerror = std::max(maxerror, (double)relerrtwonorm(M, c_2d2 + k * M, c + k * M));
+  errmax = std::max(maxerror, errmax);
   printf("\tconsistency check: sup ( ||c_many-c||_2 / ||c||_2 ) =  %.3g\n", maxerror);
   free(c_2d2);
 
@@ -225,8 +229,8 @@ int main(int argc, char *argv[]) {
   BIGINT kt = N / 4;       // check arbitrary choice of one targ pt
   Ft        = CPX(0, 0);
   for (BIGINT j = 0; j < M; ++j)
-    Ft += c[i * M + j] * exp(J * (s_freq[kt] * x[j] + t_freq[kt] * y[j]));
-  err = abs(Ft - F[kt + i * N]) / infnorm(N, F + i * N);
+    Ft += c[i * M + j] * std::exp(J * (s_freq[kt] * x[j] + t_freq[kt] * y[j]));
+  err = std::abs(Ft - F[kt + i * N]) / infnorm(N, F + i * N);
   printf("\tone targ: rel err in F[%lld] of trans#%d is %.3g\n", (long long)kt, i, err);
 
   // compare the result with FINUFFT2D3...
@@ -250,8 +254,8 @@ int main(int argc, char *argv[]) {
   // check against the old
   maxerror = 0.0; // worst error over the ntransf
   for (int k = 0; k < ntransf; ++k)
-    maxerror = max(maxerror, (double)relerrtwonorm(N, f_2d3 + k * N, F + k * N));
-  errmax = max(maxerror, errmax);
+    maxerror = std::max(maxerror, (double)relerrtwonorm(N, f_2d3 + k * N, F + k * N));
+  errmax = std::max(maxerror, errmax);
   printf("\tconsistency check: sup ( ||f_many-f||_2 / ||f||_2 ) =  %.3g\n", maxerror);
   free(f_2d3);
 
@@ -261,7 +265,7 @@ int main(int argc, char *argv[]) {
   free(F);
   free(s_freq);
   free(t_freq);
-  if (isnan(errmax) || (errmax > errfail)) {
+  if (std::isnan(errmax) || (errmax > errfail)) {
     printf("\tfailed! err %.3g > errfail %.3g\n", errmax, errfail);
     return 1;
   } else
diff --git a/test/finufft3d_test.cpp b/test/finufft3d_test.cpp
index 167fcc15b..84366ddd4 100644
--- a/test/finufft3d_test.cpp
+++ b/test/finufft3d_test.cpp
@@ -3,7 +3,11 @@
 #include "finufft/finufft_utils.hpp"
 #include "utils/dirft3d.hpp"
 #include "utils/norms.hpp"
-using namespace std;
+
+#include <algorithm>
+#include <cmath>
+#include <iomanip>
+#include <iostream>
 using namespace finufft::utils;
 
 const char *help[] = {"Tester for FINUFFT in 3d, all 3 types, either precision.",
@@ -47,7 +51,7 @@ int main(int argc, char *argv[]) {
   }
   if (argc > 9) sscanf(argv[9], "%lf", &errfail);
 
-  cout << scientific << setprecision(15);
+  std::cout << std::scientific << std::setprecision(15);
   BIGINT N = N1 * N2 * N3;
 
   FLT *x = (FLT *)malloc(sizeof(FLT) * M); // NU pts x coords
@@ -83,25 +87,25 @@ int main(int argc, char *argv[]) {
          nt3 = (BIGINT)(-0.39 * N3); // choose mode to check
   FLT Ftr = 0, Fti = 0;              // crude direct...
 #pragma omp parallel for schedule(static, TEST_RANDCHUNK) reduction(+ : Ftr, Fti)
-  for (BIGINT j = 0; j < M; ++j) {   // Ft += c[j] * exp(J*(nt1*x[j]+nt2*y[j]+nt3*z[j]))
+  for (BIGINT j = 0; j < M; ++j) {   // Ft += c[j] * std::exp(J*(nt1*x[j]+nt2*y[j]+nt3*z[j]))
     FLT w = (FLT)isign * (nt1 * x[j] + nt2 * y[j] + nt3 * z[j]), co = cos(w), si = sin(w);
     Ftr += real(c[j]) * co - imag(c[j]) * si; // cpx arith by hand
     Fti += imag(c[j]) * co + real(c[j]) * si;
   }
   // index in complex F as 1d array...
   BIGINT it = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2) + N1 * N2 * (N3 / 2 + nt3);
-  err       = abs(Ftr + IMA * Fti - F[it]) / infnorm(N, F);
+  err       = std::abs(Ftr + IMA * Fti - F[it]) / infnorm(N, F);
   printf("\tone mode: rel err in F[%lld,%lld,%lld] is %.3g\n", (long long)nt1,
          (long long)nt2, (long long)nt3, err);
   if ((int64_t)M * N <= TEST_BIGPROB) { // also check vs full direct eval
     CPX *Ft = (CPX *)malloc(sizeof(CPX) * N);
     dirft3d1(M, x, y, z, c, isign, N1, N2, N3, Ft);
     err    = relerrtwonorm(N, Ft, F);
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
     printf("\tdirft3d: rel l2-err of result F is %.3g\n", err);
     free(Ft);
   } else
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
 
   printf("test 3d type 2:\n"); // -------------- type 2
 #pragma omp parallel
@@ -126,18 +130,18 @@ int main(int argc, char *argv[]) {
   for (BIGINT m3 = -(N3 / 2); m3 <= (N3 - 1) / 2; ++m3) // loop in F order
     for (BIGINT m2 = -(N2 / 2); m2 <= (N2 - 1) / 2; ++m2)
       for (BIGINT m1 = -(N1 / 2); m1 <= (N1 - 1) / 2; ++m1)
-        ct += F[m++] * exp(IMA * (FLT)isign * (m1 * x[jt] + m2 * y[jt] + m3 * z[jt]));
-  err = abs(ct - c[jt]) / infnorm(M, c);
+        ct += F[m++] * std::exp(IMA * (FLT)isign * (m1 * x[jt] + m2 * y[jt] + m3 * z[jt]));
+  err = std::abs(ct - c[jt]) / infnorm(M, c);
   printf("\tone targ: rel err in c[%lld] is %.3g\n", (long long)jt, err);
   if ((int64_t)M * N <= TEST_BIGPROB) { // also full direct eval
     CPX *ct = (CPX *)malloc(sizeof(CPX) * M);
     dirft3d2(M, x, y, z, ct, isign, N1, N2, N3, F);
     err    = relerrtwonorm(M, ct, c);
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
     printf("\tdirft3d: rel l2-err of result c is %.3g\n", err);
     free(ct);
   } else
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
 
   printf("test 3d type 3:\n"); // -------------- type 3
                                // reuse the strengths c, interpret N as number of targs:
@@ -187,19 +191,19 @@ int main(int argc, char *argv[]) {
     Ftr += real(c[j]) * co - imag(c[j]) * si; // cpx arith by hand
     Fti += imag(c[j]) * co + real(c[j]) * si;
   }
-  err = abs(Ftr + IMA * Fti - F[kt]) / infnorm(N, F);
+  err = std::abs(Ftr + IMA * Fti - F[kt]) / infnorm(N, F);
   printf("\tone targ: rel err in F[%lld] is %.3g\n", (long long)kt, err);
   if (((int64_t)M) * N <= TEST_BIGPROB) {           // also full direct eval
     CPX *Ft = (CPX *)malloc(sizeof(CPX) * N);
     dirft3d3(M, x, y, z, c, isign, N, s, t, u, Ft); // writes to F
     err    = relerrtwonorm(N, Ft, F);
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
     printf("\tdirft3d: rel l2-err of result F is %.3g\n", err);
     // cout<<"s t u, F, Ft, F/Ft:\n"; for (int k=0;k<N;++k) cout<<s[k]<<" "<<t[k]<<"
     // "<<u[k]<<", "<<F[k]<<",\t"<<Ft[k]<<",\t"<<F[k]/Ft[k]<<endl;
     free(Ft);
   } else
-    errmax = max(err, errmax);
+    errmax = std::max(err, errmax);
 
   free(x);
   free(y);
@@ -209,7 +213,7 @@ int main(int argc, char *argv[]) {
   free(s);
   free(t);
   free(u);
-  if (isnan(errmax) || (errmax > errfail)) {
+  if (std::isnan(errmax) || (errmax > errfail)) {
     printf("\tfailed! err %.3g > errfail %.3g\n", errmax, errfail);
     return 1;
   } else
diff --git a/test/finufft3dkernel_test.cpp b/test/finufft3dkernel_test.cpp
index 87276261b..3a951b545 100644
--- a/test/finufft3dkernel_test.cpp
+++ b/test/finufft3dkernel_test.cpp
@@ -4,7 +4,9 @@
 #include "utils/dirft3d.hpp"
 #include "utils/norms.hpp"
 
-using namespace std;
+#include <algorithm>
+#include <iomanip>
+#include <iostream>
 using namespace finufft::utils;
 
 const char *help[] = {
@@ -63,7 +65,7 @@ int main(int argc, char *argv[]) {
   opts0.spread_kerevalmeth = 0;
   opts1.spread_kerevalmeth = 1;
 
-  cout << scientific << setprecision(15);
+  std::cout << std::scientific << std::setprecision(15);
   const BIGINT N = N1 * N2 * N3;
 
   std::vector<FLT> x(M);         // NU pts x coords
@@ -110,7 +112,7 @@ int main(int argc, char *argv[]) {
            (long long)M, (long long)N1, (long long)N2, (long long)N3, ti, M / ti);
 
   err    = relerrtwonorm(N, F0.data(), F1.data());
-  errmax = max(err, errmax);
+  errmax = std::max(err, errmax);
   printf("\ttype 1 rel l2-err in F is %.3g\n", err);
   // copy F0 to F1 so that we can test type 2
   F1 = F0;
@@ -178,7 +180,7 @@ int main(int argc, char *argv[]) {
     printf("\t%lld NU to %lld NU in %.3g s         \t%.3g tot NU pts/s\n", (long long)M,
            (long long)N, ti, (M + N) / ti);
   err    = relerrtwonorm(N, F0.data(), F1.data());
-  errmax = max(err, errmax);
+  errmax = std::max(err, errmax);
   printf("\ttype 3 rel l2-err in F is %.3g\n", err);
   // return 1 if any error exceeds tol
   // or return finufft error code if it is not 0
diff --git a/test/finufft3dmany_test.cpp b/test/finufft3dmany_test.cpp
index 2b8f428f6..60adb4d36 100644
--- a/test/finufft3dmany_test.cpp
+++ b/test/finufft3dmany_test.cpp
@@ -4,7 +4,10 @@
 #include "utils/dirft3d.hpp"
 #include "utils/norms.hpp"
 
-using namespace std;
+#include <algorithm>
+#include <cmath>
+#include <iomanip>
+#include <iostream>
 using namespace finufft::utils;
 
 const char *help[] = {
@@ -52,7 +55,7 @@ int main(int argc, char *argv[]) {
   }
   if (argc > 12) sscanf(argv[12], "%lf", &errfail);
 
-  cout << scientific << setprecision(15);
+  std::cout << std::scientific << std::setprecision(15);
   BIGINT N = N1 * N2 * N3;
 
   FLT *x = (FLT *)malloc(sizeof(FLT) * M);           // NU pts x coords
@@ -94,13 +97,13 @@ int main(int argc, char *argv[]) {
          nt3 = (BIGINT)(-0.39 * N3); // choose some mode index to check
   CPX Ft = CPX(0, 0), J = IMA * (FLT)isign;
   for (BIGINT j = 0; j < M; ++j)
-    Ft += c[j + i * M] * exp(J * (nt1 * x[j] + nt2 * y[j] + nt3 * z[j]));    // crude
+    Ft += c[j + i * M] * std::exp(J * (nt1 * x[j] + nt2 * y[j] + nt3 * z[j]));    // crude
                                                                              // direct
   BIGINT it = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2) + N1 * N2 * (N3 / 2 + nt3); // index in
                                                                              // complex
                                                                              // F as 1d
                                                                              // array
-  err = abs(Ft - F[it + i * N]) / infnorm(N, F + i * N);
+  err = std::abs(Ft - F[it + i * N]) / infnorm(N, F + i * N);
   printf("\tone mode: rel err in F[%lld,%lld,%lld] of trans#%d is %.3g\n", (long long)nt1,
          (long long)nt2, (long long)nt3, i, err);
 
@@ -132,8 +135,8 @@ int main(int argc, char *argv[]) {
   // Check accuracy (worst over the ntransf)
   double maxerror = 0.0;
   for (int k = 0; k < ntransf; ++k)
-    maxerror = max(maxerror, (double)relerrtwonorm(N, F_3d1 + k * N, F + k * N));
-  errmax = max(maxerror, errmax);
+    maxerror = std::max(maxerror, (double)relerrtwonorm(N, F_3d1 + k * N, F + k * N));
+  errmax = std::max(maxerror, errmax);
   printf("\tconsistency check: sup ( ||f_many-f||_2 / ||f||_2 ) =  %.3g\n", maxerror);
   free(F_3d1);
 
@@ -164,12 +167,12 @@ int main(int argc, char *argv[]) {
     for (BIGINT m2 = -(N2 / 2); m2 <= (N2 - 1) / 2; ++m2) { // loop in correct order
                                                             // over F
       for (BIGINT m1 = -(N1 / 2); m1 <= (N1 - 1) / 2; ++m1) {
-        ct += F[i * N + m++] * exp(J * (m1 * x[jt] + m2 * y[jt] + m3 * z[jt])); // crude
+        ct += F[i * N + m++] * std::exp(J * (m1 * x[jt] + m2 * y[jt] + m3 * z[jt])); // crude
                                                                                 // direct
       }
     }
   }
-  err = abs(ct - c[jt + i * M]) / infnorm(M, c + i * M);
+  err = std::abs(ct - c[jt + i * M]) / infnorm(M, c + i * M);
   printf("\tone targ: rel err in c[%lld] of trans#%d is %.3g\n", (long long)jt, i, err);
 
   finufft_fft_forget_wisdom();
@@ -193,8 +196,8 @@ int main(int argc, char *argv[]) {
 
   maxerror = 0.0; // worst error over the ntransf
   for (int k = 0; k < ntransf; ++k)
-    maxerror = max(maxerror, (double)relerrtwonorm(M, c_3d2 + k * M, c + k * M));
-  errmax = max(maxerror, errmax);
+    maxerror = std::max(maxerror, (double)relerrtwonorm(M, c_3d2 + k * M, c + k * M));
+  errmax = std::max(maxerror, errmax);
   printf("\tconsistency check: sup ( ||c_many-c||_2 / ||c||_2 ) =  %.3g\n", maxerror);
   free(c_3d2);
 
@@ -246,8 +249,8 @@ int main(int argc, char *argv[]) {
   Ft        = CPX(0, 0);
   for (BIGINT j = 0; j < M; ++j)
     Ft += c[i * M + j] *
-          exp(J * (s_freq[kt] * x[j] + t_freq[kt] * y[j] + u_freq[kt] * z[j]));
-  err = abs(Ft - F[kt + i * N]) / infnorm(N, F + i * N);
+          std::exp(J * (s_freq[kt] * x[j] + t_freq[kt] * y[j] + u_freq[kt] * z[j]));
+  err = std::abs(Ft - F[kt + i * N]) / infnorm(N, F + i * N);
   printf("\t one targ: rel err in F[%lld] of trans#%d is %.3g\n", (long long)kt, i, err);
 
   finufft_fft_forget_wisdom();
@@ -271,8 +274,8 @@ int main(int argc, char *argv[]) {
 
   maxerror = 0.0; // worst error over the ntransf
   for (int k = 0; k < ntransf; ++k)
-    maxerror = max(maxerror, (double)relerrtwonorm(N, f_3d3 + k * N, F + k * N));
-  errmax = max(maxerror, errmax);
+    maxerror = std::max(maxerror, (double)relerrtwonorm(N, f_3d3 + k * N, F + k * N));
+  errmax = std::max(maxerror, errmax);
   printf("\tconsistency check: sup ( ||f_many-f||_2 / ||f||_2 ) =  %.3g\n", maxerror);
   free(f_3d3);
 
@@ -284,7 +287,7 @@ int main(int argc, char *argv[]) {
   free(s_freq);
   free(t_freq);
   free(u_freq);
-  if (isnan(errmax) || (errmax > errfail)) {
+  if (std::isnan(errmax) || (errmax > errfail)) {
     printf("\tfailed! err %.3g > errfail %.3g\n", errmax, errfail);
     return 1;
   } else
diff --git a/test/spreadinterp1d_test.cpp b/test/spreadinterp1d_test.cpp
index 4aecc9c88..1ab368c62 100644
--- a/test/spreadinterp1d_test.cpp
+++ b/test/spreadinterp1d_test.cpp
@@ -10,7 +10,10 @@
    Barnett 1/8/25, based on ../examples/spreadinterponly1d and finufft1d_test
 */
 #include <finufft/test_defs.h>
-using namespace std;
+
+#include <algorithm>
+#include <complex>
+#include <vector>
 using namespace finufft::utils;
 
 const char *help[] = {"Tester for FINUFFT in 1d, spread/interp only, either precision.",
@@ -47,9 +50,9 @@ int main(int argc, char *argv[]) {
   }
   if (argc > 7) sscanf(argv[7], "%lf", &errfail);
 
-  vector<FLT> x(M); // NU pts
-  vector<CPX> c(M); // their strengths
-  vector<CPX> F(M); // values on regular I/O grid (not Fourier coeffs for this task!)
+  std::vector<FLT> x(M); // NU pts
+  std::vector<CPX> c(M); // their strengths
+  std::vector<CPX> F(M); // values on regular I/O grid (not Fourier coeffs for this task!)
 
   // first spread M=1 single unit-strength at the origin, to get its total mass...
   x[0]       = 0.0;
@@ -88,12 +91,12 @@ int main(int argc, char *argv[]) {
   for (auto cj : c) csum += cj;
   CPX mass = 0.0; // tot output mass
   for (auto Fk : F) mass += Fk;
-  FLT relerr = abs(mass - kersum * csum) / abs(mass);
+  FLT relerr = std::abs(mass - kersum * csum) / std::abs(mass);
   printf("\trel mass err %.3g\n", relerr);
-  errmax = max(relerr, errmax);
+  errmax = std::max(relerr, errmax);
 
   printf("interp-only test 1d:\n"); // ............................................
-  for (auto &Fk : F) Fk = complex<double>{1.0, 0.0}; // unit grid input
+  for (auto &Fk : F) Fk = std::complex<double>{1.0, 0.0}; // unit grid input
   timer.restart();                                   // F input, c output...
   ier = FINUFFT1D2(M, x.data(), c.data(), unused, tol, N, F.data(), &opts);
   t   = timer.elapsedsec();
@@ -106,10 +109,10 @@ int main(int argc, char *argv[]) {
   csum = 0.0; // tot output
   for (auto cj : c) csum += cj;
   FLT superr = 0.0;
-  for (auto cj : c) superr = max(superr, abs(cj - kersum));
-  FLT relsuperr = superr / abs(kersum);
+  for (auto cj : c) superr = std::max(superr, std::abs(cj - kersum));
+  FLT relsuperr = superr / std::abs(kersum);
   printf("\trel sup err %.3g\n", relsuperr);
-  errmax = max(relsuperr, errmax);
+  errmax = std::max(relsuperr, errmax);
 
   return (errmax > (FLT)errfail);
 }
diff --git a/test/testutils.cpp b/test/testutils.cpp
index 070e5300b..9203688fa 100644
--- a/test/testutils.cpp
+++ b/test/testutils.cpp
@@ -83,11 +83,11 @@ int main(int argc, char *argv[]) {
   }
   constexpr FLT EPSILON = std::numeric_limits<FLT>::epsilon();
   FLT relerr            = 2.0 * EPSILON; // 1 ULP, fine since 1.0 rep exactly
-  if (abs(infnorm(M, &a[0]) - 1.0) > relerr) return 1;
-  if (abs(twonorm(M, &a[0]) - sqrt((FLT)M)) > relerr * sqrt((FLT)M)) return 1;
+  if (std::abs(infnorm(M, &a[0]) - 1.0) > relerr) return 1;
+  if (std::abs(twonorm(M, &a[0]) - std::sqrt((FLT)M)) > relerr * std::sqrt((FLT)M)) return 1;
   b[0] = CPX(0.0, 0.0); // perturb b from a
-  if (abs(errtwonorm(M, &a[0], &b[0]) - 1.0) > relerr) return 1;
-  if (abs(sqrt((FLT)M) * relerrtwonorm(M, &a[0], &b[0]) - 1.0) > relerr) return 1;
+  if (std::abs(errtwonorm(M, &a[0], &b[0]) - 1.0) > relerr) return 1;
+  if (std::abs(std::sqrt((FLT)M) * relerrtwonorm(M, &a[0], &b[0]) - 1.0) > relerr) return 1;
 
 #ifdef SINGLE
   printf("testutilsf passed.\n");