Skip to content

Commit 3989d92

Browse files
committed
Merge branch 'develop'
2 parents 8d36780 + 8581e3a commit 3989d92

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+1116
-1067
lines changed

doc/Sphinx/Understand/GPU_offloading.rst

+2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ the announced exaflopic supercomputers will include GPUs.
2020
* Cartesian geometry in 1D, 2D and in 3D , for order 2
2121
* Diagnostics: Field, Probes, Scalar, ParticleBinning, TrackParticles
2222
* Moving Window
23+
* Boundary conditions for Fields: Periodic, reflective and silver-muller are supported (no PML or BM)
24+
* Boundary conditions for Particles: Periodic, Reflective, thermal, remove and stop are supported
2325

2426
* A few key features remain to be implemented (AM geometry, ionization, PML, envelope,
2527
additional physics), but the fundamentals of the code are ported.

doc/Sphinx/Use/GPU_version.rst

+2-4
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@ This page contains the links of this documentation to compile and run SMILEI on
1616

1717
----
1818

19-
Known issues
20-
^^^^^^^^^^^^
19+
Important note:
2120

22-
2D and 3D runs may crash with A2000 & A6000 GPUs (used in laptops and worstations respectively,
23-
they are not 'production GPUs' which are designed for 64 bits floating point operations )
21+
The biggest challenge to execute SMILEI on an accelerator is the correct installation of the openmpi library. It needs to be compiled with nvc++ after configuring (ie. ./configure --options) with the appropriate options specific to your system

doc/Sphinx/Use/install_linux_GPU.rst

+20
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ First, make sure you have a recent version of CMAKE, and the other libraries
66
to compile Smilei on CPU as usual. In particular, for this example, you
77
need GCC <= 12.
88

9+
The installation protocol showed below uses the openmpi included in nvhpc. This approach often results in segfault at runtime (note that nvidia will remove openmpi from nvhpc in the future).
10+
The "proper" way, which is much harder, consists in installing openmpi compiled with nvhpc (
11+
912
Make a directory to store all the nvidia tools. We call it $NVDIR:
1013

1114
.. code:: bash
@@ -72,3 +75,20 @@ To run:
7275
7376
source nvidia_env.sh
7477
smilei namelist.py
78+
79+
80+
As an example of a "simple" openmpi installation
81+
Openmpi dependencies such as zlib, hwloc and libevent should first be compiled with nvc++
82+
83+
.. code:: bash
84+
export cuda=PATH_TO_YOUR_NVHPC_FOLDER/Linux_x86_64/24.5/cuda
85+
wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz
86+
tar -xzf openmpi-4.1.5.tar.gz
87+
cd openmpi-4.1.5
88+
mkdir build
89+
cd build
90+
CC=nvc++ CXX=nvc++ CFLAGS=-fPIC CXXFLAGS=-fPIC ../configure --with-hwloc --enable-mpirun-prefix-by-default --prefix=PATH_TO_openmpi/openmpi-4.1.6/build --enable-mpi-cxx --without-verb --with-cuda=$cuda --disable-mpi-fortran -with-libevent=PATH_TO_libevent/libevent-2.1.12-stable/build
91+
make -j 4 all
92+
make install
93+
94+
Because of the complexity of the configure for openmpi, we recommend using your supercomputer support to use smilei on GPUs.

src/Collisions/BinaryProcesses.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ void BinaryProcesses::calculate_debye_length( Params &params, Patch *patch )
162162
// compute debye length squared in code units
163163
patch->debye_length_squared[ibin] = 1./inv_D2;
164164
// apply lower limit to the debye length (minimum interatomic distance)
165-
double rmin2 = pow( coeff*density_max, -2./3. );
165+
double rmin2 = 1.0 / cbrt( coeff*density_max * coeff*density_max ) ;
166166
if( patch->debye_length_squared[ibin] < rmin2 ) {
167167
patch->debye_length_squared[ibin] = rmin2;
168168
}
@@ -292,8 +292,8 @@ void BinaryProcesses::apply( Params &params, Patch *patch, int itime, vector<Dia
292292
double dt_corr = every_ * params.timestep * ((double)ncorr) * inv_cell_volume;
293293
n1 *= inv_cell_volume;
294294
n2 *= inv_cell_volume;
295-
D.n123 = pow( n1, 2./3. );
296-
D.n223 = pow( n2, 2./3. );
295+
D.n123 = cbrt(n1*n1);
296+
D.n223 = cbrt(n2*n2);
297297

298298
// Now start the real loop on pairs of particles
299299
// See equations in http://dx.doi.org/10.1063/1.4742167

src/Collisions/CollisionalIonization.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ void CollisionalIonization::calculate( double gamma_s, double gammae, double gam
248248
// Lose incident electron energy
249249
if( U2 < Wi/We ) {
250250
// Calculate the modified electron momentum
251-
double pr = sqrt( ( pow( gamma_s-e, 2 )-1. )/p2 );
251+
double pr = sqrt( ( ( gamma_s - e ) * ( gamma_s - e ) - 1. ) / p2 );
252252
pe->momentum( 0, ie ) *= pr;
253253
pe->momentum( 1, ie ) *= pr;
254254
pe->momentum( 2, ie ) *= pr;

src/Collisions/Collisions.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ Collisions::Collisions(
2222
coeff1_ = 4.046650232e-21*params.reference_angular_frequency_SI; // h*omega/(2*me*c^2)
2323
coeff2_ = 2.817940327e-15*params.reference_angular_frequency_SI/299792458.; // re omega / c
2424
coeff3_ = coeff2_ * coulomb_log_factor_;
25-
coeff4_ = pow( 3.*coeff2_, -1./3. );
25+
coeff4_ = 1.0 / cbrt( 3.*coeff2_);
2626
}
2727

2828

src/Diagnostic/DiagnosticScreen.cpp

+14-11
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ DiagnosticScreen::DiagnosticScreen(
7575
if( params.nDim_particle > 1 ) {
7676
screen_vector_a[0] = -screen_unitvector[1];
7777
screen_vector_a[1] = screen_unitvector[0];
78-
double norm = sqrt( pow( screen_vector_a[0], 2 ) + pow( screen_vector_a[1], 2 ) );
78+
double norm = sqrt( screen_vector_a[0] * screen_vector_a[0] + screen_vector_a[1] * screen_vector_a[1] );
7979
if( norm < 1.e-8 ) {
8080
screen_vector_a[0] = 0.;
8181
screen_vector_a[1] = 1.;
@@ -132,7 +132,7 @@ DiagnosticScreen::DiagnosticScreen(
132132
ERROR( errorPrefix << ": axis `theta` not available for `" << screen_shape << "` screen" );
133133
}
134134
for( idim=0; idim<params.nDim_particle; idim++ ) {
135-
coefficients[params.nDim_particle+idim] = screen_vector[idim] / pow( screen_vectornorm, 2 );
135+
coefficients[params.nDim_particle+idim] = screen_vector[idim] / ( screen_vectornorm * screen_vectornorm );
136136
}
137137
} else if( type == "phi" ) {
138138
if( screen_type == 0 ) {
@@ -187,7 +187,7 @@ void DiagnosticScreen::run( Patch *patch, int, SimWindow *simWindow )
187187
} else if( screen_type == 1 ) { // sphere
188188
double distance_to_center = 0.;
189189
for( unsigned int idim=0; idim<ndim; idim++ ) {
190-
distance_to_center += pow( patch->center_[idim] - screen_point[idim], 2 );
190+
distance_to_center += ( patch->center_[idim] - screen_point[idim] ) * ( patch->center_[idim] - screen_point[idim] );
191191
}
192192
distance_to_center = sqrt( distance_to_center );
193193
if( abs( screen_vectornorm - distance_to_center ) > patch->radius ) {
@@ -196,10 +196,10 @@ void DiagnosticScreen::run( Patch *patch, int, SimWindow *simWindow )
196196
} else if( screen_type == 2 ) { // cylinder
197197
double distance_to_axis = 0.;
198198
for( unsigned int idim=0; idim<ndim; idim++ ) {
199-
distance_to_axis += pow(
200-
( patch->center_[(idim+1)%ndim] - screen_point[(idim+1)%ndim] ) * screen_unitvector[(idim+2)%ndim]
201-
-( patch->center_[(idim+2)%ndim] - screen_point[(idim+2)%ndim] ) * screen_unitvector[(idim+1)%ndim]
202-
, 2 );
199+
200+
distance_to_axis += ( ( patch->center_[(idim+1)%ndim] - screen_point[(idim+1)%ndim] ) * screen_unitvector[(idim+2)%ndim]
201+
-( patch->center_[(idim+2)%ndim] - screen_point[(idim+2)%ndim] ) * screen_unitvector[(idim+1)%ndim] ) * ( ( patch->center_[(idim+1)%ndim] - screen_point[(idim+1)%ndim] ) * screen_unitvector[(idim+2)%ndim]
202+
-( patch->center_[(idim+2)%ndim] - screen_point[(idim+2)%ndim] ) * screen_unitvector[(idim+1)%ndim] );
203203
}
204204
distance_to_axis = sqrt( distance_to_axis );
205205
if( abs( screen_vectornorm - distance_to_axis ) > patch->radius ) {
@@ -260,8 +260,9 @@ void DiagnosticScreen::run( Patch *patch, int, SimWindow *simWindow )
260260
double side_old = 0.;
261261
double dtg = dt / s->particles->LorentzFactor( ipart );
262262
for( unsigned int idim=0; idim<ndim; idim++ ) {
263-
side += pow( s->particles->Position[idim][ipart] - screen_point[idim], 2 );
264-
side_old += pow( s->particles->Position[idim][ipart] - dtg*( s->particles->Momentum[idim][ipart] ) - screen_point[idim], 2 );
263+
side += ( s->particles->Position[idim][ipart] - screen_point[idim] ) * ( s->particles->Position[idim][ipart] - screen_point[idim] );
264+
side_old += ( s->particles->Position[idim][ipart] - dtg*( s->particles->Momentum[idim][ipart] ) - screen_point[idim] ) *
265+
( s->particles->Position[idim][ipart] - dtg*( s->particles->Momentum[idim][ipart] ) - screen_point[idim] ) ;
265266
}
266267
side = screen_vectornorm-sqrt( side );
267268
side_old = screen_vectornorm-sqrt( side_old );
@@ -284,10 +285,12 @@ void DiagnosticScreen::run( Patch *patch, int, SimWindow *simWindow )
284285
for( unsigned int idim=0; idim<ndim; idim++ ) {
285286
double u1 = s->particles->Position[(idim+1)%ndim][ipart] - screen_point[(idim+1)%ndim];
286287
double u2 = s->particles->Position[(idim+2)%ndim][ipart] - screen_point[(idim+2)%ndim];
287-
side += pow( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim], 2 );
288+
side += ( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim] ) *
289+
( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim] );
288290
u1 -= dtg * s->particles->Momentum[(idim+1)%ndim][ipart];
289291
u2 -= dtg * s->particles->Momentum[(idim+1)%ndim][ipart];
290-
side_old += pow( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim], 2 );
292+
side_old += ( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim] ) *
293+
( u1 * screen_unitvector[(idim+2)%ndim] - u2 * screen_unitvector[(idim+1)%ndim] );
291294
}
292295
side = r2 - side;
293296
side_old = r2 - side_old;

0 commit comments

Comments
 (0)