diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..069709970 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.F90 diff=fortran diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE index 83509033c..5195feed8 100644 --- a/.github/PULL_REQUEST_TEMPLATE +++ b/.github/PULL_REQUEST_TEMPLATE @@ -1,19 +1,29 @@ -[Remove this and add a short summary line]: -- Developer(s): - -- Please suggest code Pull Request reviewers in the column at right. - -- Are the code changes bit for bit, different at roundoff level, or more substantial? - -- Please include the link to test results or paste the summary block from the bottom of the testing output below. - -- Does this PR create or have dependencies on Icepack or any other models? - -- Is the documentation being updated with this PR? (Y/N) -If not, does the documentation need to be updated separately at a later time? (Y/N) - -Note: "Documentation" includes information on the wiki and .rst files in doc/source/, -which are used to create the online technical docs at https://readthedocs.org/projects/cice-consortium-cice/. - -- Other Relevant Details: +For detailed information about submitting Pull Requests (PRs) to the CICE-Consortium, +please refer to: + + +## PR checklist +- [ ] Short (1 sentence) summary of your PR: + ENTER INFORMATION HERE +- [ ] Developer(s): + ENTER INFORMATION HERE +- [ ] Suggest PR reviewers from list in the column to the right. +- [ ] Please copy the PR test results link or provide a summary of testing completed below. + ENTER INFORMATION HERE +- How much do the PR code changes differ from the unmodified code? + - [ ] bit for bit + - [ ] different at roundoff level + - [ ] more substantial +- Does this PR create or have dependencies on Icepack or any other models? + - [ ] Yes + - [ ] No +- Does this PR add any new test cases? + - [ ] Yes + - [ ] No +- Is the documentation being updated? ("Documentation" includes information on the wiki or in the .rst files from doc/source/, which are used to create the online technical docs at https://readthedocs.org/projects/cice-consortium-cice/.) + - [ ] Yes + - [ ] No, does the documentation need to be updated at a later time? + - [ ] Yes + - [ ] No +- [ ] Please provide any additional information or relevant details below: diff --git a/.travis.yml b/.travis.yml index d7e674cb3..b8098f3df 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,7 @@ language: cpp +dist: xenial + sudo: false addons: @@ -9,8 +11,9 @@ addons: packages: - tcsh - pkg-config - - netcdf-bin libnetcdf-dev #libnetcdff-dev (only required on Debian) + - netcdf-bin libnetcdf-dev libnetcdff-dev - gfortran + - gcc - openmpi-bin libopenmpi-dev - wget #- lftp diff --git a/.zenodo.json b/.zenodo.json new file mode 100644 index 000000000..5eebe93e6 --- /dev/null +++ b/.zenodo.json @@ -0,0 +1,104 @@ +{ + "license": "other-open", + "description": "No description provided", + "language": "English", + "title": "CICE-Consortium/CICE: CICE Version m.n.p", + "keywords": [ + "sea ice model, CICE, Icepack" + ], + "version": "m.n.p", + "upload_type": "software", + "communities": [ + { + "identifier": "cice-consortium" + } + ], + "publication_date": "2019-07-25", + "creators": [ + { + "affiliation": "Los Alamos National Laboratory", + "name": "Elizabeth Hunke" + }, + { + "affiliation": "Naval Research Laboratory Stennis Space Center", + "name": "Richard Allard" + }, + { + "affiliation": "National Center for Atmospheric Research", + "name": "David A. Bailey" + }, + { + "affiliation": "Environment and Climate Change Canada", + "name": "Philippe Blain" + }, + { + "affiliation": "Environment and Climate Change Canada", + "name": "Amelie Bouchat" + }, + { + "affiliation": "National Oceanographic and Atmospheric Administration (CTR)", + "name": "Tony Craig" + }, + { + "affiliation": "Environment and Climate Change Canada", + "name": "Frederic Dupont" + }, + { + "affiliation": "National Center for Atmospheric Research", + "name": "Alice DuVivier" + }, + { + "affiliation": "National Oceanographic and Atmospheric Administration", + "name": "Robert Grumbine" + }, + { + "affiliation": "Naval Research Laboratory Stennis Space Center", + "name": "David Hebert" + }, + { + "affiliation": "National Center for Atmospheric Research", + "name": "Marika Holland" + }, + { + "affiliation": "Los Alamos National Laboratory", + "name": "Nicole Jeffery" + }, + { + "affiliation": "Environment and Climate Change Canada", + "name": "Jean-Francois Lemieux" + }, + { + "affiliation": "Danish Meteorological Institute", + "name": "Till Rasmussen" + }, + { + "affiliation": "Danish Meteorological Institute", + "name": "Mads Ribergaard" + }, + { + "affiliation": "Los Alamos National Laboratory", + "name": "Andrew Roberts" + }, + { + "affiliation": "Naval Research Laboratory Stennis Space Center (CTR)", + "name": "Matthew Turner" + }, + { + "affiliation": "Geophysical Fluid Dynamics Laboratory", + "name": "Michael Winton" + } + ], + "access_right": "open", + "related_identifiers": [ + { + "scheme": "url", + "identifier": "https://github.com/CICE-Consortium/CICE/tree/CICE6.0.1", + "relation": "isSupplementTo" + }, + { + "scheme": "doi", + "identifier": "10.5281/zenodo.1205674", + "relation": "isVersionOf" + } + ] +} \ No newline at end of file diff --git a/cice.setup b/cice.setup index a8f8d8e90..7380def71 100755 --- a/cice.setup +++ b/cice.setup @@ -19,6 +19,7 @@ set test = ${spval} set grid = gx3 set pesx = 4x1 set sets = "" +set tdir = ${spval} set bdir = ${spval} set testid = ${spval} set testsuite = ${spval} @@ -73,10 +74,11 @@ SYNOPSIS --test TEST -m MACH --testid ID [-e ENV][-p MxN][-g GRID][-s SET1,SET2][--acct ACCT] [--diff TESTNAME][--bdir DIR][--bgen DIR][--bcmp DIR] + [--tdir PATH] --suite SUITE[,SUITE2] -m MACH --testid ID [-e ENV1,ENV2][--acct ACCT][--bdir DIR][--bgen DIR] - [--bcmp DIR][--report] + [--bcmp DIR][--tdir PATH][--report] DESCRIPTION --help, -h : help @@ -95,9 +97,10 @@ DESCRIPTION --test : test, test name (not with --case or --suite) --suite : test suite, pre-defined set or sets of tests, comma separated (not with --case or --test) - --bdir : top baseline directory, default ICE_MACHINE_BASELINE - --bgen : baselines directory where output from current tests are copied - --bcmp : baselines directory where output from current tests are compared + --tdir : directory name where tests will be located + --bdir : baseline directory for regression testing, default ICE_MACHINE_BASELINE + --bgen : directory name where output from current tests are copied + --bcmp : directory name where output from current tests are compared --testid : test ID, user-defined id for testing (REQUIRED with --test or --suite) --diff : generate comparison against another case --report : automatically post results when tests are complete @@ -234,6 +237,8 @@ while (1) else if ("$option" == "--test") then set test = $argv[1] set dotest = 1 + else if ("$option" == "--tdir") then + set tdir = $argv[1] else if ("$option" == "--grid" || "$option" == "-g") then set grid = $argv[1] else if ("$option" == "--queue") then @@ -314,11 +319,24 @@ if ( ${dosuite} == 1 ) then set tsdir = "testsuite.${testid}" set tsfile = "testsuite.${testid}.${sdate}-${stime}.list" endif +if ( ${tdir} != ${spval} ) then + set tsdir = ${tdir} +endif if (-e $tsfile) then echo "${0}: ERROR in tsfile, this should never happen" exit -1 endif +set remote = `git remote -v | grep -i origin | grep -i push | sed "s|.*\(https.*\)\s.*|\1|g"` +set branch = `git status | grep -i "on branch" | sed 's|^.*ranch\s*\(\S*\)$|\1|g'` +set hash = `git log | grep -i commit | head -1 | cut -d " " -f 2-` +set hashuser = `git log | grep -i author | head -1 | cut -d : -f 2-` +set hashdate = `git log | grep -i date | head -1 | cut -d : -f 2-` +set cdate = `date -u "+%Y-%m-%d"` +set ctime = `date -u "+%H:%M:%S"` +set vers = ${ICE_VERSION} +set shhash = `echo ${hash} | cut -c 1-10` + if ( ${dosuite} == 0 ) then set teststring = "${test} ${grid} ${pesx} ${sets}" if ( $bfbcomp != ${spval} ) then @@ -348,34 +366,29 @@ else endif end - if (-e ./${tsdir}) then + if (-e ${tsdir}) then echo "${0}: ERROR, ${tsdir} already exists" exit -1 endif - mkdir ./${tsdir} - cp -f ${ICE_SCRIPTS}/tests/report_results.csh ./${tsdir} - cp -f ${ICE_SCRIPTS}/tests/timeseries.csh ./${tsdir} - - if ($report == 1) then - cp -f ${ICE_SCRIPTS}/tests/poll_queue.csh ./${tsdir} + mkdir -p ${tsdir} + if ($status != 0) then + echo "${0}: ERROR, mkdir ${tsdir} aborted" + exit -1 endif + cp -f ${ICE_SCRIPTS}/tests/report_results.csh ${tsdir} + cp -f ${ICE_SCRIPTS}/tests/poll_queue.csh ${tsdir} -cat >! ./${tsdir}/suite.run << EOF0 + foreach file (${tsdir}/suite.run ${tsdir}/suite.submit) + cat >! $file << EOF0 #!/bin/csh -f + +set nonomatch && rm -f ciceexe.* && unset nonomatch + EOF0 + end - set remote = `git remote -v | grep -i origin | grep -i push | sed "s|.*\(https.*\)\s.*|\1|g"` - set branch = `git status | grep -i "on branch" | sed 's|^.*ranch\s*\(\S*\)$|\1|g'` - set hash = `git log | grep -i commit | head -1 | cut -d " " -f 2-` - set hashuser = `git log | grep -i author | head -1 | cut -d : -f 2-` - set hashdate = `git log | grep -i date | head -1 | cut -d : -f 2-` - set cdate = `date -u "+%Y-%m-%d"` - set ctime = `date -u "+%H:%M:%S"` - set vers = ${ICE_VERSION} - set shhash = `echo ${hash} | cut -c 1-10` - -cat >! ./${tsdir}/results.csh << EOF0 +cat >! ${tsdir}/results.csh << EOF0 #!/bin/csh -f rm -f results.log echo "#------- " >> results.log @@ -393,8 +406,9 @@ echo "#vers = ${vers}" >> results.log echo "#------- " >> results.log EOF0 - chmod +x ./${tsdir}/suite.run - chmod +x ./${tsdir}/results.csh + chmod +x ${tsdir}/suite.run + chmod +x ${tsdir}/suite.submit + chmod +x ${tsdir}/results.csh endif @@ -418,23 +432,40 @@ if (${doabort} == true) then exit -1 endif +# Create a new sets_base variable to store sets passed to cice.setup set sets_base = "${sets}" set bfbcomp_base = "$bfbcomp" foreach compiler ( $ncompilers ) set machcomp = ${machine}_${compiler} foreach line ( "`cat $tsfile`" ) + # Check if line is a comment line if ( $line:q =~ '#'* || $line:q =~ '$'* || $line:q =~ '!'* ) then echo "skipping line: $line" continue + # Check if line is a sleep line, can only happen with suites + else if ( $line:q =~ 'sleep'*) then +cat >> ${tsdir}/suite.submit << EOF +echo "-------test--------------" +echo "$line:q" +$line:q + +EOF + echo "adding sleep line: $line" + echo "" + echo "---" + echo "" + continue endif + + source ${ICE_SCRIPTS}/machines/env.${machcomp} -nomodules || exit 2 + # Obtain the test name, sets, grid, and PE information from .ts file set test = `echo $line | cut -d' ' -f1` set grid = `echo $line | cut -d' ' -f2` set pesx = `echo $line | cut -d' ' -f3` set sets_tmp = `echo $line | cut -d' ' -f4` set bfbcomp_tmp = `echo $line | cut -d' ' -f5` - # Create a new sets_base variable to store sets passed to cice.setup # Append sets from .ts file to the $sets variable set sets = "$sets_base,$sets_tmp" @@ -449,7 +480,62 @@ foreach compiler ( $ncompilers ) set fbfbcomp = ${spval} if ($bfbcomp != ${spval}) then - set fbfbcomp = ${machcomp}_${bfbcomp}.${testid} + set fbfbcomp = ${machcomp}_${bfbcomp} + endif + + #------------------------------------------------------------ + # Parse pesx with strict checking, limit pes for machine + + set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*$/OK/'` + if (${chck} == OK) then + set task = `echo ${pesx} | cut -d x -f 1` + set thrd = `echo ${pesx} | cut -d x -f 2` + set blckx = `echo ${pesx} | cut -d x -f 3` + set blcky = `echo ${pesx} | cut -d x -f 4` + set mblck = `echo ${pesx} | cut -d x -f 5` + if ($?ICE_MACHINE_MAXPES) then + @ pesreq = ${task} * ${thrd} + if (${pesreq} > ${ICE_MACHINE_MAXPES}) then + @ task = ${ICE_MACHINE_MAXPES} / ${thrd} + @ mblck = ${mblck} * ((${pesreq} / ${ICE_MACHINE_MAXPES}) + 1) + endif + endif + set pesx = ${task}x${thrd}x${blckx}x${blcky}x${mblck} + else + set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*$/OK/'` + if (${chck} == OK) then + set task = `echo ${pesx} | cut -d x -f 1` + set thrd = `echo ${pesx} | cut -d x -f 2` + set blckx = `echo ${pesx} | cut -d x -f 3` + set blcky = `echo ${pesx} | cut -d x -f 4` + set mblck = 0 + if ($?ICE_MACHINE_MAXPES) then + @ pesreq = ${task} * ${thrd} + if (${pesreq} > ${ICE_MACHINE_MAXPES}) then + @ task = ${ICE_MACHINE_MAXPES} / ${thrd} + endif + endif + set pesx = ${task}x${thrd}x${blckx}x${blcky} + else + set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*$/OK/'` + if (${chck} == OK) then + set task = `echo ${pesx} | cut -d x -f 1` + set thrd = `echo ${pesx} | cut -d x -f 2` + set blckx = 0 + set blcky = 0 + set mblck = 0 + if ($?ICE_MACHINE_MAXPES) then + @ pesreq = ${task} * ${thrd} + if (${pesreq} > ${ICE_MACHINE_MAXPES}) then + @ task = ${ICE_MACHINE_MAXPES} / ${thrd} + endif + endif + set pesx = ${task}x${thrd} + else + echo "${0}: ERROR in -p argument, ${pesx}, must be [m]x[n], [m]x[n]x[bx]x[by], or [m]x[n]x[bx]x[by]x[mb] " + exit -1 + endif + endif endif set testname_noid = ${spval} @@ -468,11 +554,7 @@ foreach compiler ( $ncompilers ) # soptions starts with _ set testname_noid = "${machcomp}_${test}_${grid}_${pesx}${soptions}" set testname_base = "${machcomp}_${test}_${grid}_${pesx}${soptions}.${testid}" - if (${dosuite} == 1) then - set testname = "${tsdir}/$testname_base" - else - set testname = "$testname_base" - endif + set testname = "${tsdir}/${testname_base}" set case = ${testname} endif @@ -492,7 +574,7 @@ foreach compiler ( $ncompilers ) # Setup case directory, copy files to case directory mkdir -p ${case} - echo "`date`${0} $initargv[*]" >> ${case}/README.case + echo "`date` ${0} $initargv[*]" >> ${case}/README.case cd ${case} set casedir = `pwd` @@ -507,7 +589,7 @@ foreach compiler ( $ncompilers ) endif # from basic script dir to case - foreach file (cice.build cice.settings Makefile ice_in makdep.c setup_run_dirs.csh) + foreach file (cice.build cice.settings Makefile ice_in makdep.c setup_run_dirs.csh timeseries.csh timeseries.py) if !(-e ${ICE_SCRIPTS}/$file) then echo "${0}: ERROR, ${ICE_SCRIPTS}/$file not found" exit -1 @@ -534,7 +616,6 @@ foreach compiler ( $ncompilers ) end cd ${casedir} - source ./env.${machcomp} -nomodules || exit 2 set quietmode = false if ($?ICE_MACHINE_QUIETMODE) then @@ -560,36 +641,6 @@ foreach compiler ( $ncompilers ) #------------------------------------------------------------ # Compute a default blocksize - set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*$/OK/'` - if (${chck} == OK) then - set task = `echo ${pesx} | cut -d x -f 1` - set thrd = `echo ${pesx} | cut -d x -f 2` - set blckx = `echo ${pesx} | cut -d x -f 3` - set blcky = `echo ${pesx} | cut -d x -f 4` - set mblck = `echo ${pesx} | cut -d x -f 5` - else - set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*x[0-9][0-9]*$/OK/'` - if (${chck} == OK) then - set task = `echo ${pesx} | cut -d x -f 1` - set thrd = `echo ${pesx} | cut -d x -f 2` - set blckx = `echo ${pesx} | cut -d x -f 3` - set blcky = `echo ${pesx} | cut -d x -f 4` - set mblck = 0 - else - set chck = `echo ${pesx} | sed 's/^[0-9][0-9]*x[0-9][0-9]*$/OK/'` - if (${chck} == OK) then - set task = `echo ${pesx} | cut -d x -f 1` - set thrd = `echo ${pesx} | cut -d x -f 2` - set blckx = 0 - set blcky = 0 - set mblck = 0 - else - echo "${0}: ERROR in -p argument, ${pesx}, must be [m]x[n], [m]x[n]x[bx]x[by], or [m]x[n]x[bx]x[by]x[mb] " - exit -1 - endif - endif - endif - setenv ICE_DECOMP_GRID ${grid} setenv ICE_DECOMP_NTASK ${task} setenv ICE_DECOMP_NTHRD ${thrd} @@ -678,6 +729,7 @@ setenv ICE_SPVAL ${spval} setenv ICE_QUIETMODE ${quietmode} setenv ICE_TEST ${test} setenv ICE_TESTNAME ${testname_noid} +setenv ICE_TESTID ${testid} setenv ICE_BFBCOMP ${fbfbcomp} setenv ICE_ACCOUNT ${acct} setenv ICE_QUEUE ${queue} @@ -690,7 +742,7 @@ EOF1 foreach name ($setsxorig) if (-e ${ICE_SCRIPTS}/options/set_files.${name}) then echo "adding options files from set_files.${name}" - echo "`date`${0} adding options files from set_files.${name}" >> ${casedir}/README.case + echo "`date` ${0} adding options files from set_files.${name}" >> ${casedir}/README.case set setsnew = `cat ${ICE_SCRIPTS}/options/set_files.${name}` foreach nset ($setsnew) if ($nset !~ "#*") then @@ -708,9 +760,10 @@ EOF1 if (${docase} == 0) then # from test options to casescr in case any test time changes are applied + cp -f -p ${ICE_SCRIPTS}/tests/comparebfb.csh ${casescr} + cp -f -p ${ICE_SCRIPTS}/tests/comparelog.csh ${casescr} if (-e ${ICE_SCRIPTS}/tests/test_${test}.files) then cp -f -p ${ICE_SCRIPTS}/tests/test_${test}.files ${casescr} - cp -f -p ${ICE_SCRIPTS}/tests/comparebfb.csh ${casescr} foreach file (`cat ${casescr}/test_${test}.files`) if (-e ${ICE_SCRIPTS}/options/$file) then cp -f -p ${ICE_SCRIPTS}/options/$file ${casescr} @@ -737,7 +790,7 @@ cat >> ${fimods} << EOF2 EOF2 echo "adding namelist mods set_nml.${name}" - echo "`date`${0} adding namelist modes set_nml.${name}" >> ${casedir}/README.case + echo "`date` ${0} adding namelist modes set_nml.${name}" >> ${casedir}/README.case set found = 1 endif if (-e ${ICE_SCRIPTS}/options/set_env.${name}) then @@ -755,7 +808,7 @@ cat >> ${fimods} << EOF2 EOF2 echo "adding env mods set_env.${name}" - echo "`date`${0} adding namelist modes set_env.${name}" >> ${casedir}/README.case + echo "`date` ${0} adding namelist modes set_env.${name}" >> ${casedir}/README.case set found = 1 endif if (${found} == 0) then @@ -807,13 +860,13 @@ EOF2 if ( ${dosuite} == 1 ) then cd ${ICE_SANDBOX} - # Write build and run commands to suite.run + # Write build and run commands to suite.run and suite.submit -cat >> ./${tsdir}/results.csh << EOF -cat $testname_base/test_output >> results.log +cat >> ${tsdir}/results.csh << EOF +cat ${testname_base}/test_output >> results.log EOF - -cat >> ./${tsdir}/suite.run << EOF + foreach file (${tsdir}/suite.run ${tsdir}/suite.submit) + cat >> $file << EOF echo "-------test--------------" echo "${testname_base}" cd ${testname_base} @@ -821,10 +874,20 @@ source ./cice.settings set ciceexe = "../ciceexe.\${ICE_COMPILER}.\${ICE_COMMDIR}.\${ICE_BLDDEBUG}.\${ICE_THREADED}.\${ICE_IOTYPE}" ./cice.build \${ciceexe} if !(-e \${ciceexe}) cp -p \${ICE_RUNDIR}/cice \${ciceexe} -./cice.submit +EOF + end + cat >> ${tsdir}/suite.submit << EOF +./cice.submit | tee -a ../suite.jobs +EOF + cat >> ${tsdir}/suite.run << EOF +./cice.test +EOF + foreach file (${tsdir}/suite.run ${tsdir}/suite.submit) + cat >> $file << EOF cd .. EOF + end # Reset case for the next test in suite set case = ${spval} @@ -845,7 +908,7 @@ end if ( ${dosuite} == 1 ) then # Add code to results.csh to count the number of failures -cat >> ./${tsdir}/results.csh << EOF +cat >> ${tsdir}/results.csh << EOF cat ./results.log set pends = \`cat ./results.log | grep PEND | wc -l\` set failures = \`cat ./results.log | grep FAIL | wc -l\` @@ -875,9 +938,15 @@ echo "\$pends of \$total tests PENDING" exit \$failures EOF + if ($?ICE_MACHINE_QSTAT) then +cat >! ${tsdir}/poll_queue.env << EOF0 +setenv ICE_MACHINE_QSTAT ${ICE_MACHINE_QSTAT} +EOF0 + endif + # build and submit tests cd ${tsdir} - ./suite.run | tee suite.log + ./suite.submit | tee suite.log if ($report == 1) then echo "Reporting results" ./poll_queue.csh diff --git a/cicecore/cicedynB/dynamics/ice_dyn_evp.F90 b/cicecore/cicedynB/dynamics/ice_dyn_evp.F90 index d26f2d4c9..c1371db7a 100644 --- a/cicecore/cicedynB/dynamics/ice_dyn_evp.F90 +++ b/cicecore/cicedynB/dynamics/ice_dyn_evp.F90 @@ -34,6 +34,7 @@ module ice_dyn_evp use ice_kinds_mod + use ice_communicate, only: my_task use ice_constants, only: field_loc_center, field_loc_NEcorner, & field_type_scalar, field_type_vector use ice_constants, only: c0, c4, p027, p055, p111, p166, & @@ -92,7 +93,7 @@ subroutine evp (dt) use ice_timers, only: timer_dynamics, timer_bound, & ice_timer_start, ice_timer_stop, timer_evp_1d, timer_evp_2d use ice_dyn_evp_1d - use ice_dyn_shared, only: evp_kernel_ver + use ice_dyn_shared, only: kevp_kernel real (kind=dbl_kind), intent(in) :: & dt ! time step @@ -346,14 +347,14 @@ subroutine evp (dt) !$OMP END PARALLEL DO endif call ice_timer_start(timer_evp_2d) - if (evp_kernel_ver > 0) then - !write(*,*)'Entering evp_kernel version ',evp_kernel_ver + if (kevp_kernel > 0) then +! if (my_task == 0) write(nu_diag,*) subname,' Entering kevp_kernel version ',kevp_kernel if (trim(grid_type) == 'tripole') then - call abort_ice('(ice_dyn_evp): & - & Kernel not tested on tripole grid. Set evp_kernel_ver=0') + call abort_ice(trim(subname)//' & + & Kernel not tested on tripole grid. Set kevp_kernel=0') endif call evp_copyin( & - nx_block,ny_block,nblocks,nx_global+2*nghost,ny_global+2*nghost,& + nx_block,ny_block,nblocks,nx_global+2*nghost,ny_global+2*nghost, & HTE,HTN, & !v1 dxhy,dyhx,cyp,cxp,cym,cxm,tinyarea, & !v1 waterx,watery, & @@ -364,15 +365,15 @@ subroutine evp (dt) stressp_1 ,stressp_2, stressp_3, stressp_4, & stressm_1 ,stressm_2, stressm_3, stressm_4, & stress12_1,stress12_2,stress12_3,stress12_4 ) - if (evp_kernel_ver == 2) then + if (kevp_kernel == 2) then call ice_timer_start(timer_evp_1d) call evp_kernel_v2() call ice_timer_stop(timer_evp_1d) -!v1 else if (evp_kernel_ver == 1) then +!v1 else if (kevp_kernel == 1) then !v1 call evp_kernel_v1() else - write(*,*)'Kernel: evp_kernel_ver = ',evp_kernel_ver - call abort_ice('(ice_dyn_evp): Kernel not implemented.') + if (my_task == 0) write(nu_diag,*) subname,' ERROR: kevp_kernel = ',kevp_kernel + call abort_ice(subname//' kevp_kernel not supported.') endif call evp_copyout( & nx_block,ny_block,nblocks,nx_global+2*nghost,ny_global+2*nghost,& @@ -383,7 +384,7 @@ subroutine evp (dt) stress12_1,stress12_2,stress12_3,stress12_4, & divu,rdg_conv,rdg_shear,shear,taubx,tauby ) - else ! evp_kernel_ver == 0 (Standard CICE) + else ! kevp_kernel == 0 (Standard CICE) do ksub = 1,ndte ! subcycling @@ -461,7 +462,7 @@ subroutine evp (dt) !$OMP END PARALLEL DO enddo ! subcycling - endif ! evp_kernel_ver + endif ! kevp_kernel call ice_timer_stop(timer_evp_2d) deallocate(fld2) diff --git a/cicecore/cicedynB/dynamics/ice_dyn_evp_1d.F90 b/cicecore/cicedynB/dynamics/ice_dyn_evp_1d.F90 index 3ba03e0a8..b1f162967 100644 --- a/cicecore/cicedynB/dynamics/ice_dyn_evp_1d.F90 +++ b/cicecore/cicedynB/dynamics/ice_dyn_evp_1d.F90 @@ -1,3 +1,10 @@ +! ice_dyn_evp_1d +! +! Contains 3 Fortran modules, +! * dmi_omp +! * bench_v2 +! * ice_dyn_evp_1d +! ! Modules used for: ! * convert 2D arrays into 1D vectors ! * Do stress/stepu/halo_update interations @@ -21,24 +28,22 @@ !=============================================================================== module dmi_omp - !- directives ---------------------------------------------------------------- + use ice_kinds_mod + use ice_fileunits, only: nu_diag + use ice_exit, only: abort_ice + implicit none private - INTEGER, PARAMETER :: JPIM = SELECTED_INT_KIND(9) + public :: domp_init, domp_get_domain, domp_get_thread_no - !- interfaces ---------------------------------------------------------------- interface domp_get_domain module procedure domp_get_domain_rlu end interface + INTEGER, PARAMETER :: JPIM = SELECTED_INT_KIND(9) integer(int_kind), private :: domp_iam, domp_nt - private :: domp_get_domain_rlu - - !- public vars & methods ----------------------------------------------------- - public :: domp_init, domp_get_domain, domp_get_thread_no - #if defined (_OPENMP) ! Please note, this constant will create a compiler info for a constant ! expression in IF statements: @@ -47,8 +52,8 @@ module dmi_omp #endif contains - - ! ---------------------------------------------------------------------------- + +!---------------------------------------------------------------------------- subroutine domp_init(nt_out) @@ -57,9 +62,11 @@ subroutine domp_init(nt_out) #endif use ice_forcing, only : dbug - !- argument(s) ------------------------------------------------------------- integer(int_kind), intent(out) :: nt_out + character(len=*), parameter :: subname = '(domp_init)' + !--------------------------------------- + !$OMP PARALLEL DEFAULT(none) #if defined (_OPENMP) domp_iam = omp_get_thread_num() @@ -74,21 +81,21 @@ subroutine domp_init(nt_out) if (dbug) then #if defined (_OPENACC) - write(*,'(a27)') 'Build with openACC support' + write(nu_diag,'(2a)') subname,' Build with openACC support' !#elif defined (_OPENMP) -! write(*,'(a26)') 'Build with openMP support' +! write(nu_diag,'(2a)') subname,' Build with openMP support' !#else -! write(*,'(a41)') 'Build without openMP and openACC support' +! write(nu_diag,'(2a)') subname,' Build without openMP and openACC support' #endif !- echo #threads: if (domp_nt > 1) then - write(*,'(a20,i5,a8)') 'Running openMP with ', domp_nt, ' threads' + write(nu_diag,'(2a,i5,a)') subname,' Running openMP with ', domp_nt, ' threads' else #if defined (_OPENMP) - write(*,'(a35)') 'Running openMP with a single thread' + write(nu_diag,'(2a)') subname,' Running openMP with a single thread' #else - write(*,'(a22)') 'Running without openMP' + write(nu_diag,'(2a)') subname,' Running without openMP' #endif endif endif @@ -98,24 +105,27 @@ subroutine domp_init(nt_out) end subroutine domp_init - ! ---------------------------------------------------------------------------- +!---------------------------------------------------------------------------- subroutine domp_get_domain_rlu(lower,upper,d_lower,d_upper) + #if defined (_OPENMP) use omp_lib, only : omp_in_parallel #endif use ice_constants, only: p5 - !- arguments --------------------------------------------------------------- integer(KIND=JPIM), intent(in) :: lower,upper integer(KIND=JPIM), intent(out) :: d_lower,d_upper #if defined (_OPENMP) - ! local variables --------------------------------------------------------- + !-- local variables real(kind=dbl_kind) :: dlen integer(int_kind) :: lr, ur #endif + character(len=*), parameter :: subname = '(domp_get_domain_rlu)' + !--------------------------------------- + ! proper action in "null" cases: if (upper <= 0 .or. upper < lower) then d_lower = 0 @@ -136,34 +146,53 @@ subroutine domp_get_domain_rlu(lower,upper,d_lower,d_upper) #endif if (.false.) then - write(*,'(a14,i3,a24,i10,i10)') 'openMP thread ', domp_iam, & + write(nu_diag,'(2a,i3,a,2i10)') subname,' openMP thread ', domp_iam, & ' handles range: ', d_lower, d_upper endif end subroutine domp_get_domain_rlu +!---------------------------------------------------------------------------- + subroutine domp_get_thread_no (tnum) - implicit none + implicit none integer(int_kind), intent(out) :: tnum + character(len=*), parameter :: subname = '(domp_get_thread_no)' tnum = domp_iam + 1 end subroutine domp_get_thread_no - ! ---------------------------------------------------------------------------- + +!---------------------------------------------------------------------------- + end module dmi_omp + !=============================================================================== +!=============================================================================== + module bench_v2 - !- interfaces ---------------------------------------------------------------- - interface stress + + use ice_fileunits, only: nu_diag + use ice_exit, only: abort_ice + + implicit none + private + public :: evp1d_stress, evp1d_stepu, evp1d_halo_update + + interface evp1d_stress module procedure stress_i module procedure stress_l end interface - interface stepu + interface evp1d_stepu module procedure stepu_iter module procedure stepu_last end interface + contains + +!---------------------------------------------------------------------------- + subroutine stress_i(NA_len, & ee,ne,se,lb,ub,uvel,vvel,dxt,dyt, & hte,htn,htem1,htnm1, & @@ -171,15 +200,15 @@ subroutine stress_i(NA_len, & stressm_1,stressm_2,stressm_3,stressm_4,stress12_1, & stress12_2,stress12_3,stress12_4,str1,str2,str3,str4,str5, & str6,str7,str8) - !- modules ------------------------------------------------------------------- + use ice_kinds_mod use dmi_omp, only : domp_get_domain use ice_constants, only: p027, p055, p111, p166, p222, p25, p333, p5, c1p5, c1 use icepack_parameters, only: puny use ice_dyn_shared, only: ecci, denom1, arlx1i, Ktens, revp - !- directives ---------------------------------------------------------------- + implicit none - ! arguments ------------------------------------------------------------------ + integer (kind=int_kind), intent(in) :: NA_len integer (kind=int_kind), intent(in) :: lb,ub integer (kind=int_kind), dimension(:), intent(in), contiguous :: ee,ne,se @@ -191,7 +220,9 @@ subroutine stress_i(NA_len, & stressm_3,stressm_4, stress12_1,stress12_2,stress12_3, stress12_4 real (kind=DBL_KIND), dimension(:), intent(out), contiguous :: & str1,str2,str3,str4,str5,str6,str7,str8 - ! local variables ------------------------------------------------------------ + + !-- local variables + integer (kind=int_kind) :: iw,il,iu real (kind=DBL_KIND) :: & divune, divunw, divuse, divusw,tensionne, tensionnw, tensionse, tensionsw, & @@ -205,6 +236,9 @@ subroutine stress_i(NA_len, & tmp_vvel_ne, tmp_uvel_ne, tmp_uvel_se real (kind=DBL_KIND) :: dxhy,dyhx,cxp,cyp,cxm,cym,tinyarea + character(len=*), parameter :: subname = '(stress_i)' + !--------------------------------------- + #ifdef _OPENACC !$acc parallel & !$acc present(ee,ne,se,strength,uvel,vvel,dxt,dyt, & @@ -419,8 +453,11 @@ subroutine stress_i(NA_len, & #ifdef _OPENACC !$acc end parallel #endif + end subroutine stress_i +!---------------------------------------------------------------------------- + subroutine stress_l(NA_len, tarear, & ee,ne,se,lb,ub,uvel,vvel,dxt,dyt, & hte,htn,htem1,htnm1, & @@ -429,15 +466,15 @@ subroutine stress_l(NA_len, tarear, & stress12_2,stress12_3,stress12_4, & divu,rdg_conv,rdg_shear,shear, & str1,str2,str3,str4,str5,str6,str7,str8 ) - !- modules ------------------------------------------------------------------- + use ice_kinds_mod use dmi_omp, only : domp_get_domain use ice_constants, only: p027, p055, p111, p166, p222, p25, p333, p5, c1p5, c0, c1 use icepack_parameters, only: puny use ice_dyn_shared, only: ecci, denom1, arlx1i, Ktens, revp - !- directives ---------------------------------------------------------------- + implicit none - ! arguments ------------------------------------------------------------------ + integer (kind=int_kind), intent(in) :: NA_len integer (kind=int_kind), intent(in) :: lb,ub integer (kind=int_kind), dimension(:), intent(in), contiguous :: ee,ne,se @@ -451,7 +488,9 @@ subroutine stress_l(NA_len, tarear, & str1,str2,str3,str4,str5,str6,str7,str8 real (kind=dbl_kind), dimension(:), intent(out), contiguous :: & divu,rdg_conv,rdg_shear,shear - ! local variables ------------------------------------------------------------ + + !-- local variables + integer (kind=int_kind) :: iw,il,iu real (kind=DBL_KIND) :: & divune, divunw, divuse, divusw,tensionne, tensionnw, tensionse, tensionsw, & @@ -465,6 +504,9 @@ subroutine stress_l(NA_len, tarear, & tmp_vvel_ne, tmp_uvel_ne, tmp_uvel_se real (kind=DBL_KIND) :: dxhy,dyhx,cxp,cyp,cxm,cym,tinyarea + character(len=*), parameter :: subname = '(stress_l)' + !--------------------------------------- + #ifdef _OPENACC !$acc parallel & !$acc present(ee,ne,se,strength,uvel,vvel,dxt,dyt,tarear, & @@ -687,18 +729,20 @@ subroutine stress_l(NA_len, tarear, & #endif end subroutine stress_l +!---------------------------------------------------------------------------- + subroutine stepu_iter(NA_len,rhow, & lb,ub,Cw,aiu,uocn,vocn,forcex,forcey,umassdti,fm,uarear,Tbu, & uvel_init,vvel_init,uvel,vvel, & str1,str2,str3,str4,str5,str6,str7,str8, nw,sw,se,skipme) - !- modules ------------------------------------------------------------------- + use ice_kinds_mod use dmi_omp, only : domp_get_domain use ice_dyn_shared, only: brlx, revp use ice_constants, only: c0, c1 - !- directives ---------------------------------------------------------------- + implicit none - ! arguments ------------------------------------------------------------------ + integer (kind=int_kind), intent(in) :: NA_len real (kind=dbl_kind), intent(in) :: rhow integer(kind=int_kind),intent(in) :: lb,ub @@ -714,7 +758,9 @@ subroutine stepu_iter(NA_len,rhow, & real (kind=dbl_kind), parameter :: & cosw = c1 , & ! cos(ocean turning angle) ! turning angle = 0 sinw = c0 - ! local variables + + !-- local variables + integer (kind=int_kind) :: iw,il,iu real (kind=dbl_kind) :: uold, vold, vrel,cca,ccb,ab2,cc1,cc2,taux,tauy,Cb real (kind=dbl_kind) :: tmp_str2_nw,tmp_str3_se,tmp_str4_sw, tmp_strintx @@ -722,6 +768,9 @@ subroutine stepu_iter(NA_len,rhow, & real (kind=dbl_kind) :: waterx,watery real (kind=dbl_kind) :: u0 = 5.e-5_dbl_kind ! residual velocity for basal stress (m/s) + character(len=*), parameter :: subname = '(stepu_iter)' + !--------------------------------------- + #ifdef _OPENACC !$acc parallel & !$acc present(Cw,aiu,uocn,vocn,forcex,forcey,umassdti,fm,uarear,Tbu, & @@ -770,20 +819,22 @@ subroutine stepu_iter(NA_len,rhow, & end subroutine stepu_iter +!---------------------------------------------------------------------------- + subroutine stepu_last(NA_len, rhow, & lb,ub,Cw,aiu,uocn,vocn,forcex,forcey,umassdti,fm,uarear,Tbu, & strintx,strinty,taubx,tauby, & uvel_init,vvel_init,uvel,vvel, & str1,str2,str3,str4,str5,str6,str7,str8, nw,sw,se,skipme) - !- modules ------------------------------------------------------------------- + use ice_kinds_mod use dmi_omp, only : domp_get_domain use ice_constants, only: c0, c1 use icepack_parameters, only: puny use ice_dyn_shared, only: brlx, revp, basalstress - !- directives ---------------------------------------------------------------- + implicit none - ! arguments ------------------------------------------------------------------ + integer (kind=int_kind), intent(in) :: NA_len real (kind=dbl_kind), intent(in) :: rhow logical(kind=log_kind),intent(in), dimension(:) :: skipme @@ -799,7 +850,9 @@ subroutine stepu_last(NA_len, rhow, & real (kind=dbl_kind), parameter :: & cosw = c1 , & ! cos(ocean turning angle) ! turning angle = 0 sinw = c0 - ! local variables + + !-- local variables + integer (kind=int_kind) :: iw,il,iu real (kind=dbl_kind) :: uold, vold, vrel,cca,ccb,ab2,cc1,cc2,taux,tauy,Cb real (kind=dbl_kind) :: tmp_str2_nw,tmp_str3_se,tmp_str4_sw @@ -807,6 +860,9 @@ subroutine stepu_last(NA_len, rhow, & real (kind=dbl_kind) :: waterx,watery real (kind=dbl_kind) :: u0 = 5.e-5_dbl_kind ! residual velocity for basal stress (m/s) + character(len=*), parameter :: subname = '(stepu_last)' + !--------------------------------------- + #ifdef _OPENACC !$acc parallel & !$acc present(Cw,aiu,uocn,vocn,forcex,forcey,umassdti,fm,uarear,Tbu, & @@ -857,22 +913,30 @@ subroutine stepu_last(NA_len, rhow, & #ifdef _OPENACC !$acc end parallel #endif + end subroutine stepu_last - subroutine halo_update(NAVEL_len,lb,ub,uvel,vvel, halo_parent) - !- modules ------------------------------------------------------------------- +!---------------------------------------------------------------------------- + + subroutine evp1d_halo_update(NAVEL_len,lb,ub,uvel,vvel, halo_parent) + use ice_kinds_mod use dmi_omp, only : domp_get_domain - !- directives ---------------------------------------------------------------- + implicit none - ! arguments ------------------------------------------------------------------ + integer (kind=int_kind), intent(in) :: NAVEL_len integer(kind=int_kind),intent(in) :: lb,ub integer(kind=int_kind),dimension(:), intent(in), contiguous :: halo_parent real(kind=dbl_kind),dimension(:), intent(inout), contiguous :: uvel,vvel - ! local variables + + !-- local variables + integer (kind=int_kind) :: iw,il,iu + character(len=*), parameter :: subname = '(evp1d_halo_update)' + !--------------------------------------- + #ifdef _OPENACC !$acc parallel & !$acc present(uvel,vvel) & @@ -889,20 +953,30 @@ subroutine halo_update(NAVEL_len,lb,ub,uvel,vvel, halo_parent) #ifdef _OPENACC !$acc end parallel #endif - end subroutine halo_update + + end subroutine evp1d_halo_update + +!---------------------------------------------------------------------------- end module bench_v2 +!=============================================================================== !=============================================================================== !-- One dimension representation of EVP 2D arrays used for EVP kernels module ice_dyn_evp_1d + use ice_kinds_mod + use ice_fileunits, only: nu_diag use ice_exit, only: abort_ice !-- BEGIN: specific for the KERNEL use ice_dyn_shared, only: revp, ecci, denom1, arlx1i, brlx !-- END: specific for the KERNEL + implicit none + private + public :: evp_copyin, evp_copyout, evp_kernel_v2 + interface evp_copyin ! module procedure evp_copyin_v1 module procedure evp_copyin_v2 @@ -911,9 +985,7 @@ module ice_dyn_evp_1d ! module procedure convert_2d_1d_v1 module procedure convert_2d_1d_v2 end interface - public :: evp_copyin, evp_copyout, evp_kernel_v2 - private - save + integer(kind=int_kind) :: & NA_len, NAVEL_len logical(kind=log_kind), dimension(:), allocatable :: & @@ -936,11 +1008,21 @@ module ice_dyn_evp_1d real (kind=dbl_kind), dimension(:), allocatable :: & HTE,HTN, & HTEm1,HTNm1 + contains + +!---------------------------------------------------------------------------- + subroutine alloc1d(na) + implicit none + integer(kind=int_kind),intent(in) :: na integer(kind=int_kind) :: ierr,nb + + character(len=*), parameter :: subname = '(alloc1d)' + !--------------------------------------- + nb=na allocate( & ! U+T cells @@ -968,22 +1050,43 @@ subroutine alloc1d(na) uvel_init(1:nb),vvel_init(1:nb), & taubx(1:nb),tauby(1:nb), & stat=ierr) - if (ierr/=0) call abort_ice('(ice_dyn_evp_1d) : Error allocating 1D') + + if (ierr/=0) call abort_ice(subname//': ERROR allocating 1D') + end subroutine alloc1d + +!---------------------------------------------------------------------------- + subroutine alloc1d_navel(navel) + implicit none + integer(kind=int_kind),intent(in) :: navel integer(kind=int_kind) :: ierr + + character(len=*), parameter :: subname = '(alloc1d_navel)' + !--------------------------------------- + allocate( & uvel(1:navel),vvel(1:navel), indij(1:navel), halo_parent(1:navel), & str1(1:navel),str2(1:navel),str3(1:navel),str4(1:navel), & str5(1:navel),str6(1:navel),str7(1:navel),str8(1:navel), & stat=ierr) - if (ierr/=0) call abort_ice('(ice_dyn_evp_1d) : Error allocating 1D navel') + if (ierr/=0) call abort_ice(subname// ': Error allocating 1D navel') + end subroutine alloc1d_navel + +!---------------------------------------------------------------------------- + subroutine dealloc1d + implicit none + integer(kind=int_kind) :: ierr + + character(len=*), parameter :: subname = '(dealloc1d)' + !--------------------------------------- + deallocate( & ! U+T cells ! Helper index for neighbours @@ -1009,24 +1112,29 @@ subroutine dealloc1d ! NAVEL uvel,vvel, indij, halo_parent, & stat=ierr) - if (ierr/=0) call abort_ice('(ice_dyn_evp_1d) : Error de-allocating 1D') + + if (ierr/=0) call abort_ice(subname//': Error de-allocating 1D') + !v1 if (allocated(tinyarea)) then !v1 deallocate( & !v1 dxhy,dyhx,cyp,cxp,cym,cxm,tinyarea, & !v1 waterx,watery, & !v1 stat=ierr) -!v1 if (ierr/=0) call abort_ice('(ice_dyn_evp_1d) : Error de-allocating 1D, v1') +!v1 if (ierr/=0) call abort_ice(subname//': Error de-allocating 1D, v1') !v1 endif + if (allocated(HTE)) then deallocate( & ! Grid distances: HTE,HTN + "-1 neighbours" HTE,HTN, HTEm1,HTNm1, & stat=ierr) - if (ierr/=0) call abort_ice('(ice_dyn_evp_1d) : Error de-allocating 1D, v2') + if (ierr/=0) call abort_ice(subname//': Error de-allocating 1D, v2') endif + end subroutine dealloc1d -!=============================================================================== -!=============================================================================== + +!---------------------------------------------------------------------------- + subroutine evp_copyin_v2(nx,ny,nblk,nx_glob,ny_glob, & I_HTE,I_HTN, & !v1 I_dxhy,I_dyhx,I_cyp,I_cxp,I_cym,I_cxm,I_tinyarea, & @@ -1038,11 +1146,14 @@ subroutine evp_copyin_v2(nx,ny,nblk,nx_glob,ny_glob, I_stressp_1 ,I_stressp_2, I_stressp_3, I_stressp_4, & I_stressm_1 ,I_stressm_2, I_stressm_3, I_stressm_4, & I_stress12_1,I_stress12_2,I_stress12_3,I_stress12_4 ) + use ice_gather_scatter, only: gather_global_ext use ice_domain, only: distrb_info use ice_communicate, only: my_task, master_task use ice_constants, only: c0,c1,p5 + implicit none + integer(int_kind), intent(in) :: nx, ny, nblk, nx_glob, ny_glob integer (kind=int_kind),dimension (nx,ny,nblk), intent(in) :: I_icetmask logical (kind=log_kind),dimension (nx,ny,nblk), intent(in) :: I_iceumask @@ -1056,7 +1167,9 @@ subroutine evp_copyin_v2(nx,ny,nblk,nx_glob,ny_glob, I_stressp_1, I_stressp_2, I_stressp_3, I_stressp_4, & I_stressm_1, I_stressm_2, I_stressm_3, I_stressm_4, & I_stress12_1,I_stress12_2,I_stress12_3,I_stress12_4 - ! local variables + + !-- local variables + integer (kind=int_kind),dimension (nx_glob,ny_glob) :: G_icetmask logical (kind=log_kind),dimension (nx_glob,ny_glob) :: G_iceumask real (kind=dbl_kind), dimension(nx_glob,ny_glob) :: & @@ -1070,7 +1183,11 @@ subroutine evp_copyin_v2(nx,ny,nblk,nx_glob,ny_glob, G_stressm_1, G_stressm_2, G_stressm_3, G_stressm_4, & G_stress12_1,G_stress12_2,G_stress12_3,G_stress12_4 integer(kind=int_kind) :: na, navel + + character(len=*), parameter :: subname = '(evp_copyin_v2)' + !--------------------------------------- !-- Gather data into one single block -- + call gather_global_ext(G_icetmask, I_icetmask, master_task, distrb_info) call gather_global_ext(G_iceumask, I_iceumask, master_task, distrb_info) call gather_global_ext(G_HTE, I_HTE, master_task, distrb_info) @@ -1116,9 +1233,9 @@ subroutine evp_copyin_v2(nx,ny,nblk,nx_glob,ny_glob, call gather_global_ext(G_stress12_2, I_stress12_2, master_task, distrb_info) call gather_global_ext(G_stress12_3, I_stress12_3, master_task, distrb_info) call gather_global_ext(G_stress12_4, I_stress12_4, master_task, distrb_info) - ! END: Gather data !-- All calculations has to be done on the master-task -- + if (my_task == master_task) then !-- Find number of active points and allocate vectors -- call calc_na(nx_glob,ny_glob,na,G_icetmask) @@ -1144,32 +1261,39 @@ subroutine evp_copyin_v2(nx,ny,nblk,nx_glob,ny_glob, NA_len=na NAVEL_len=navel endif - !-- write check + + !-- write check !if (1 == 1) then -! write(*,*)'MHRI: INDICES start: evp-copyin' -! write(*,*) 'na,navel ', na,navel -! write(*,*) 'Min/max ee', minval(ee(1:na)), maxval(ee(1:na)) -! write(*,*) 'Min/max ne', minval(ne(1:na)), maxval(ne(1:na)) -! write(*,*) 'Min/max se', minval(se(1:na)), maxval(se(1:na)) -! write(*,*) 'Min/max nw', minval(nw(1:na)), maxval(nw(1:na)) -! write(*,*) 'Min/max sw', minval(sw(1:na)), maxval(sw(1:na)) -! write(*,*) 'Min/max sse', minval(sse(1:na)), maxval(sse(1:na)) -! write(*,*)'MHRI: INDICES end: evp-copyin' +! write(nu_diag,*) subname,' MHRI: INDICES start:' +! write(nu_diag,*) 'na,navel ', na,navel +! write(nu_diag,*) 'Min/max ee', minval(ee(1:na)), maxval(ee(1:na)) +! write(nu_diag,*) 'Min/max ne', minval(ne(1:na)), maxval(ne(1:na)) +! write(nu_diag,*) 'Min/max se', minval(se(1:na)), maxval(se(1:na)) +! write(nu_diag,*) 'Min/max nw', minval(nw(1:na)), maxval(nw(1:na)) +! write(nu_diag,*) 'Min/max sw', minval(sw(1:na)), maxval(sw(1:na)) +! write(nu_diag,*) 'Min/max sse', minval(sse(1:na)), maxval(sse(1:na)) +! write(nu_diag,*) subname,' MHRI: INDICES end:' !endif + end subroutine evp_copyin_v2 - !=============================================================================== + +!---------------------------------------------------------------------------- + subroutine evp_copyout(nx,ny,nblk,nx_glob,ny_glob, & I_uvel,I_vvel, I_strintx,I_strinty, & I_stressp_1, I_stressp_2, I_stressp_3, I_stressp_4, & I_stressm_1, I_stressm_2, I_stressm_3, I_stressm_4, & I_stress12_1,I_stress12_2,I_stress12_3,I_stress12_4, & I_divu,I_rdg_conv,I_rdg_shear,I_shear,I_taubx,I_tauby ) + use ice_constants, only : c0, field_loc_center, field_loc_NEcorner, & field_type_scalar, field_type_vector use ice_gather_scatter, only: scatter_global_ext, scatter_global use ice_domain, only: distrb_info use ice_communicate, only: my_task, master_task + implicit none + integer(int_kind), intent(in) :: nx,ny,nblk, nx_glob,ny_glob real(dbl_kind), dimension(nx,ny,nblk), intent(out) :: & I_uvel,I_vvel, I_strintx,I_strinty, & @@ -1177,7 +1301,9 @@ subroutine evp_copyout(nx,ny,nblk,nx_glob,ny_glob, & I_stressm_1, I_stressm_2, I_stressm_3, I_stressm_4, & I_stress12_1,I_stress12_2,I_stress12_3,I_stress12_4, & I_divu,I_rdg_conv, I_rdg_shear,I_shear, I_taubx,I_tauby - ! local variables + + !-- local variables + real(dbl_kind), dimension(nx_glob,ny_glob) :: & G_uvel,G_vvel, G_strintx,G_strinty, & G_stressp_1, G_stressp_2, G_stressp_3, G_stressp_4, & @@ -1185,8 +1311,12 @@ subroutine evp_copyout(nx,ny,nblk,nx_glob,ny_glob, & G_stress12_1,G_stress12_2,G_stress12_3,G_stress12_4, & G_divu,G_rdg_conv, G_rdg_shear,G_shear, G_taubx,G_tauby integer(int_kind) :: i,j,iw, nx_block + + character(len=*), parameter :: subname = '(evp_copyout)' + !--------------------------------------- ! Remap 1d to 2d and fill in nx_block=nx_glob ! Total block size in x-dir + if (my_task == master_task) then G_uvel = c0 G_vvel = c0 @@ -1251,7 +1381,7 @@ subroutine evp_copyout(nx,ny,nblk,nx_glob,ny_glob, & !-- Scatter data into blocks -- !-- has to be done on all tasks -- - ! BEGIN: Scatter data + call scatter_global_ext(I_uvel, G_uvel, master_task, distrb_info) call scatter_global_ext(I_vvel, G_vvel, master_task, distrb_info) call scatter_global_ext(I_strintx, G_strintx, master_task, distrb_info) @@ -1274,22 +1404,31 @@ subroutine evp_copyout(nx,ny,nblk,nx_glob,ny_glob, & call scatter_global_ext(I_shear, G_shear, master_task, distrb_info) call scatter_global_ext(I_taubx, G_taubx, master_task, distrb_info) call scatter_global_ext(I_tauby, G_tauby, master_task, distrb_info) + end subroutine evp_copyout - !=============================================================================== + +!---------------------------------------------------------------------------- + subroutine evp_kernel_v2 + use ice_constants, only : c0 use ice_dyn_shared, only: ndte - use bench_v2, only : stress, stepu, halo_update + use bench_v2, only : evp1d_stress, evp1d_stepu, evp1d_halo_update use dmi_omp, only : domp_init use icepack_intfc, only: icepack_query_parameters use ice_communicate, only: my_task, master_task implicit none + real(kind=dbl_kind) :: rhow integer (kind=int_kind) :: ierr, lun, i, nthreads integer (kind=int_kind) :: na,nb,navel + character(len=*), parameter :: subname = '(evp_kernel_v2)' + !--------------------------------------- !-- All calculations has to be done on one single node (choose master-task) -- + if (my_task == master_task) then + !- Read constants... call icepack_query_parameters(rhow_out=rhow) na=NA_len @@ -1309,10 +1448,11 @@ subroutine evp_kernel_v2 str7=c0 str8=c0 - if (ndte<2) STOP 'ndte must be 2 or higher for this kernel' + if (ndte<2) call abort_ice(subname//' ERROR: ndte must be 2 or higher for this kernel') + !$OMP PARALLEL PRIVATE(i) do i = 1, ndte-1 - call stress (NA_len, & + call evp1d_stress(NA_len, & ee,ne,se,1,na,uvel,vvel,dxt,dyt, & hte,htn,htem1,htnm1, & strength,stressp_1,stressp_2,stressp_3,stressp_4, & @@ -1320,15 +1460,16 @@ subroutine evp_kernel_v2 stress12_2,stress12_3,stress12_4,str1,str2,str3, & str4,str5,str6,str7,str8) !$OMP BARRIER - call stepu(NA_len, rhow, & + call evp1d_stepu(NA_len, rhow, & 1,nb,cdn_ocn,aiu,uocn,vocn,forcex,forcey,umassdti,fm,uarear,Tbu,& uvel_init,vvel_init,uvel,vvel, & str1,str2,str3,str4,str5,str6,str7,str8, nw,sw,sse,skipucell) !$OMP BARRIER - call halo_update(NA_len,1,navel,uvel,vvel, halo_parent) + call evp1d_halo_update(NA_len,1,navel,uvel,vvel, halo_parent) !$OMP BARRIER enddo - call stress (NA_len, tarear, & + + call evp1d_stress(NA_len, tarear, & ee,ne,se,1,na,uvel,vvel,dxt,dyt, & hte,htn,htem1,htnm1, & strength,stressp_1,stressp_2,stressp_3,stressp_4, & @@ -1337,25 +1478,35 @@ subroutine evp_kernel_v2 divu,rdg_conv,rdg_shear,shear, & str1,str2,str3,str4,str5,str6,str7,str8) !$OMP BARRIER - call stepu (NA_len, rhow, & + call evp1d_stepu(NA_len, rhow, & 1,nb,cdn_ocn,aiu,uocn,vocn,forcex,forcey,umassdti,fm,uarear,Tbu,& strintx,strinty,taubx,tauby, & uvel_init,vvel_init,uvel,vvel, & str1,str2,str3,str4,str5,str6,str7,str8, nw,sw,sse,skipucell) !$OMP BARRIER - call halo_update(NA_len,1,navel,uvel,vvel, halo_parent) + call evp1d_halo_update(NA_len,1,navel,uvel,vvel, halo_parent) !$OMP END PARALLEL + endif + end subroutine evp_kernel_v2 - !=============================================================================== + +!---------------------------------------------------------------------------- + subroutine calc_na(nx,ny,na,icetmask) ! Calculate number of active points (na) use ice_blocks, only: nghost + implicit none + integer(int_kind),intent(in) :: nx,ny integer(int_kind),intent(out) :: na integer (kind=int_kind),dimension (nx,ny), intent(in) :: icetmask integer(int_kind) :: i,j + + character(len=*), parameter :: subname = '(calc_na)' + !--------------------------------------- + na = 0 ! Note: The icellt mask includes north and east ghost cells. (ice_dyn_shared.F90) do j = 1+nghost, ny ! -nghost @@ -1365,14 +1516,25 @@ subroutine calc_na(nx,ny,na,icetmask) endif enddo enddo + end subroutine calc_na + +!---------------------------------------------------------------------------- + subroutine calc_2d_indices(nx,ny,na,icetmask,iceumask) + use ice_blocks, only: nghost + implicit none + integer(int_kind),intent(in) :: nx,ny,na integer (kind=int_kind),dimension (nx,ny), intent(in) :: icetmask logical (kind=log_kind),dimension (nx,ny), intent(in) :: iceumask integer(int_kind) :: i,j,Nmaskt + + character(len=*), parameter :: subname = '(calc_2d_indices)' + !--------------------------------------- + skipucell(:)=.false. indi=0 indj=0 @@ -1392,21 +1554,32 @@ subroutine calc_2d_indices(nx,ny,na,icetmask,iceumask) enddo enddo if (Nmaskt.ne.na) then - write(*,*)'Nmaskt,na: ',Nmaskt,na - call abort_ice('(ice_dyn_evp_1d) : Problem Nmaskt != na') + write(nu_diag,*) subname,' Nmaskt,na: ',Nmaskt,na + call abort_ice(subname//': ERROR Problem Nmaskt != na') endif if (Nmaskt==0) then - write(*,*)'WARNING: NO ICE' + write(nu_diag,*) subname,' WARNING: NO ICE' endif + end subroutine calc_2d_indices + +!---------------------------------------------------------------------------- + subroutine calc_navel(nx_block,ny_block,na,navel) ! Calculate number of active points including needed halo points (navel) + implicit none + integer(int_kind),intent(in) :: nx_block,ny_block,na integer(int_kind),intent(out) :: navel + integer(int_kind) :: iw,i,j integer(int_kind),dimension(1:na) :: Iin,Iee,Ine,Ise,Inw,Isw,Isse integer(int_kind),dimension(1:7*na) :: util1,util2 + + character(len=*), parameter :: subname = '(calc_navel)' + + !--------------------------------------- ! Additional indices used for finite differences (FD) do iw=1,na i=indi(iw) @@ -1419,6 +1592,7 @@ subroutine calc_navel(nx_block,ny_block,na,navel) Isw(iw) = i+1 + (j-0)*nx_block ! (+1,+1) Isse(iw)= i + (j-0)*nx_block ! ( 0,+1) enddo + !-- Find number of points needed for finite difference calculations call union(Iin, Iee,na,na,util1,i) call union(util1,Ine, i,na,util2,j) @@ -1426,15 +1600,20 @@ subroutine calc_navel(nx_block,ny_block,na,navel) call union(util1,Inw, i,na,util2,j) call union(util2,Isw, j,na,util1,i) call union(util1,Isse,i,na,util2,navel) + !-- Check bounds do iw=1,navel if (util2(iw)>nx_block*ny_block .or. util2(iw)<1) then - write(*,*)'nx_block,ny_block,nx_block*ny_block: ',nx_block,ny_block,nx_block*ny_block - write(*,*)'na,navel,iw,util2(iw): ',na,navel,iw,util2(iw) - call abort_ice('(ice_dyn_evp_1d) : Problem with boundary. Check halo zone values') + write(nu_diag,*) subname,' nx_block,ny_block,nx_block*ny_block: ',nx_block,ny_block,nx_block*ny_block + write(nu_diag,*) subname,' na,navel,iw,util2(iw): ',na,navel,iw,util2(iw) + call abort_ice(subname//': Problem with boundary. Check halo zone values') endif enddo + end subroutine calc_navel + +!---------------------------------------------------------------------------- + subroutine convert_2d_1d_v2(nx,ny, na,navel, & I_HTE,I_HTN, & !v1 I_dxhy,I_dyhx,I_cyp,I_cxp,I_cym,I_cxm,I_tinyarea, & @@ -1446,7 +1625,9 @@ subroutine convert_2d_1d_v2(nx,ny, na,navel, & I_stressp_1 ,I_stressp_2, I_stressp_3, I_stressp_4, & I_stressm_1 ,I_stressm_2, I_stressm_3, I_stressm_4, & I_stress12_1,I_stress12_2,I_stress12_3,I_stress12_4 ) + implicit none + integer(int_kind),intent(in) :: nx,ny,na,navel real (kind=dbl_kind), dimension(nx,ny), intent(in) :: & I_HTE,I_HTN, & @@ -1459,10 +1640,15 @@ subroutine convert_2d_1d_v2(nx,ny, na,navel, & I_stressp_1 ,I_stressp_2, I_stressp_3, I_stressp_4, & I_stressm_1 ,I_stressm_2, I_stressm_3, I_stressm_4, & I_stress12_1,I_stress12_2,I_stress12_3,I_stress12_4 + integer(int_kind) :: iw,i,j, nx_block integer(int_kind),dimension(1:na) :: Iin,Iee,Ine,Ise,Inw,Isw,Isse integer(int_kind),dimension(1:7*na) :: util1,util2 integer(int_kind) :: nachk + + character(len=*), parameter :: subname = '(convert_2d_1d_v2)' + + !--------------------------------------- ! Additional indices used for finite differences (FD) nx_block=nx ! Total block size in x-dir do iw=1,na @@ -1476,6 +1662,7 @@ subroutine convert_2d_1d_v2(nx,ny, na,navel, & Isw(iw) = i+1 + (j-0)*nx_block ! (+1,+1) Isse(iw)= i + (j-0)*nx_block ! ( 0,+1) enddo + !-- Find number of points needed for finite difference calculations call union(Iin, Iee,na,na,util1,i) call union(util1,Ine, i,na,util2,j) @@ -1483,15 +1670,17 @@ subroutine convert_2d_1d_v2(nx,ny, na,navel, & call union(util1,Inw, i,na,util2,j) call union(util2,Isw, j,na,util1,i) call union(util1,Isse,i,na,util2,nachk) + if (nachk .ne. navel) then - write(*,*)'ERROR: navel badly chosen: na,navel,nachk = ',na,navel,nachk - call abort_ice('(ice_dyn_evp_1d) : ERROR: navel badly chosen') + write(nu_diag,*) subname,' ERROR: navel badly chosen: na,navel,nachk = ',na,navel,nachk + call abort_ice(subname//': ERROR: navel badly chosen') endif ! indij: vector with target points (sorted) ... do iw=1,na indij(iw)=Iin(iw) enddo + ! indij: ... followed by extra points (sorted) call setdiff(util2,Iin,navel,na,util1,j) do iw=na+1,navel @@ -1508,14 +1697,14 @@ subroutine convert_2d_1d_v2(nx,ny, na,navel, & !-- write check !if (1 == 2) then -! write(*,*)'MHRI: INDICES start: convert_2d_1d_v2' -! write(*,*) 'Min/max ee', minval(ee), maxval(ee) -! write(*,*) 'Min/max ne', minval(ne), maxval(ne) -! write(*,*) 'Min/max se', minval(se), maxval(se) -! write(*,*) 'Min/max nw', minval(nw), maxval(nw) -! write(*,*) 'Min/max sw', minval(sw), maxval(sw) -! write(*,*) 'Min/max sse',minval(sse),maxval(sse) -! write(*,*)'MHRI: INDICES end: convert_2d_1d_v2' +! write(nu_diag,*) subname,' MHRI: INDICES start:' +! write(nu_diag,*) 'Min/max ee', minval(ee), maxval(ee) +! write(nu_diag,*) 'Min/max ne', minval(ne), maxval(ne) +! write(nu_diag,*) 'Min/max se', minval(se), maxval(se) +! write(nu_diag,*) 'Min/max nw', minval(nw), maxval(nw) +! write(nu_diag,*) 'Min/max sw', minval(sw), maxval(sw) +! write(nu_diag,*) 'Min/max sse',minval(sse),maxval(sse) +! write(nu_diag,*) subname,' MHRI: INDICES end:' !endif ! Write 1D data from 2D: Here only extra FD part, the rest follows... @@ -1580,20 +1769,33 @@ subroutine convert_2d_1d_v2(nx,ny, na,navel, & HTNm1(iw) = I_HTN(i,j-1) enddo !$OMP END PARALLEL DO + end subroutine convert_2d_1d_v2 + +!---------------------------------------------------------------------------- + subroutine calc_halo_parent(nx,ny,na,navel, I_icetmask) + implicit none + integer(kind=int_kind),intent(in) :: nx,ny,na,navel integer(kind=int_kind), dimension(nx,ny), intent(in) :: I_icetmask + integer(kind=int_kind) :: iw,i,j !,masku,maskt integer(kind=int_kind),dimension(1:navel) :: Ihalo + + character(len=*), parameter :: subname = '(calc_halo_parent)' + + !--------------------------------------- ! Indices for halo update: ! 0: no halo point ! >0: index for halo point parent. Finally related to indij vector ! TODO: ONLY for nghost==1 ! TODO: ONLY for circular grids - NOT tripole grids + Ihalo(:)=0 halo_parent(:)=0 + !$OMP PARALLEL DO PRIVATE(iw,i,j) do iw=1,navel j=int((indij(iw)-1)/(nx))+1 @@ -1605,13 +1807,14 @@ subroutine calc_halo_parent(nx,ny,na,navel, I_icetmask) if (j==1 .and. I_icetmask(i,ny-1)==1) Ihalo(iw)= i+(ny-2)*nx enddo !$OMP END PARALLEL DO + ! Relate halo indices to indij vector call findXinY_halo(Ihalo,indij,navel,navel,halo_parent) !-- write check !if (1 == 1) then ! integer(kind=int_kind) :: iiw,ii,jj !,masku,maskt MHRI -! write(*,*)'MHRI: halo boundary start: calc_halo_parent ' +! write(nu_diag,*) subname,' MHRI: halo boundary start:' ! do iw=1,navel ! if (halo_parent(iw)>0) then ! iiw=halo_parent(iw) @@ -1621,23 +1824,33 @@ subroutine calc_halo_parent(nx,ny,na,navel, I_icetmask) ! jj=j ! j=int((indij(iw)-1)/(nx))+1 ! i=indij(iw)-(j-1)*nx -! write(*,*)iw,i,j,iiw,ii,jj +! write(nu_diag,*)iw,i,j,iiw,ii,jj ! endif ! enddo -! write(*,*)'MHRI: halo boundary end: calc_halo_parent ' +! write(nu_diag,*) subname,' MHRI: halo boundary end:' !endif + end subroutine calc_halo_parent - !======================================================================= + +!---------------------------------------------------------------------------- + subroutine union(x,y,nx,ny,xy,nxy) ! Find union (xy) of two sorted integer vectors (x and y) ! ie. Combined values of the two vectors with no repetitions. !use ice_kinds_mod + implicit none + integer (int_kind) :: i,j,k integer (int_kind),intent(in) :: nx,ny integer (int_kind),intent(in) :: x(1:nx),y(1:ny) integer (int_kind),intent(out) :: xy(1:nx+ny) integer (int_kind),intent(out) :: nxy + + character(len=*), parameter :: subname = '(union)' + + !--------------------------------------- + i=1 j=1 k=1 @@ -1655,6 +1868,7 @@ subroutine union(x,y,nx,ny,xy,nxy) endif k=k+1 enddo + ! The rest do while (i<=nx) xy(k)=x(i) @@ -1667,18 +1881,27 @@ subroutine union(x,y,nx,ny,xy,nxy) k=k+1 enddo nxy=k-1 + end subroutine union - !======================================================================= + +!---------------------------------------------------------------------------- + subroutine setdiff(x,y,nx,ny,xy,nxy) ! Find element (xy) of two sorted integer vectors (x and y) ! that are in x, but not in y ... or in y, but not in x !use ice_kinds_mod + implicit none + integer (int_kind) :: i,j,k integer (int_kind),intent(in) :: nx,ny integer (int_kind),intent(in) :: x(1:nx),y(1:ny) integer (int_kind),intent(out) :: xy(1:nx+ny) integer (int_kind),intent(out) :: nxy + + character(len=*), parameter :: subname = '(setdiff)' + !--------------------------------------- + i=1 j=1 k=1 @@ -1696,6 +1919,7 @@ subroutine setdiff(x,y,nx,ny,xy,nxy) j=j+1 endif enddo + ! The rest do while (i<=nx) xy(k)=x(i) @@ -1708,8 +1932,11 @@ subroutine setdiff(x,y,nx,ny,xy,nxy) k=k+1 enddo nxy=k-1 + end subroutine setdiff - !======================================================================= + +!---------------------------------------------------------------------------- + subroutine findXinY(x,y,nx,ny,indx) ! Find indx vector so that x(1:na)=y(indx(1:na)) ! @@ -1722,11 +1949,17 @@ subroutine findXinY(x,y,nx,ny,indx) ! Return: indx(1:na) ! !use ice_kinds_mod + implicit none + integer (int_kind),intent(in) :: nx,ny integer (int_kind),intent(in) :: x(1:nx),y(1:ny) integer (int_kind),intent(out) :: indx(1:nx) integer (int_kind) :: i,j1,j2 + + character(len=*), parameter :: subname = '(findXinY)' + !--------------------------------------- + i=1 j1=1 j2=nx+1 @@ -1744,13 +1977,17 @@ subroutine findXinY(x,y,nx,ny,indx) else if (x(i)>y(j2) ) then !.and. j22) then ! Stop for inf. loop. This check should not be necessary for halo - write(*,*)'nx,ny: ',nx,ny - write(*,*)'i,j1: ',i,j1 - write(*,*)'x(i),y(j1): ',x(i),y(j1) - call abort_ice('(ice_dyn_evp_1d) : ERROR in findXinY_halo: too many loops') + write(nu_diag,*) subname,' nx,ny: ',nx,ny + write(nu_diag,*) subname,' i,j1: ',i,j1 + write(nu_diag,*) subname,' x(i),y(j1): ',x(i),y(j1) + call abort_ice(subname//': ERROR too many loops') endif endif endif end do + end subroutine findXinY_halo - !======================================================================= +!---------------------------------------------------------------------------- + subroutine numainit(l,u,uu) - !- modules ----------------------------------------------------------------- + use dmi_omp, only : domp_get_domain use ice_constants, only: c0 + implicit none + integer(kind=int_kind),intent(in) :: l,u,uu + integer(kind=int_kind) :: lo,up + + character(len=*), parameter :: subname = '(numainit)' + !--------------------------------------- + call domp_get_domain(l,u,lo,up) ee(lo:up)=0 ne(lo:up)=0 @@ -1877,8 +2129,10 @@ subroutine numainit(l,u,uu) str6(lo:up)=c0 str7(lo:up)=c0 str8(lo:up)=c0 + end subroutine numainit - !======================================================================= +!---------------------------------------------------------------------------- + end module ice_dyn_evp_1d diff --git a/cicecore/cicedynB/dynamics/ice_dyn_shared.F90 b/cicecore/cicedynB/dynamics/ice_dyn_shared.F90 index dc929ddee..c26e36d96 100644 --- a/cicecore/cicedynB/dynamics/ice_dyn_shared.F90 +++ b/cicecore/cicedynB/dynamics/ice_dyn_shared.F90 @@ -40,10 +40,10 @@ module ice_dyn_shared revised_evp ! if true, use revised evp procedure integer (kind=int_kind), public :: & - evp_kernel_ver ! 0 = 2D org version - ! 1 = 1D representation raw (not implemented) - ! 2 = 1D + calculate distances inline (implemented) - ! 3 = 1D + calculate distances inline + real*4 internal (not implemented yet) + kevp_kernel ! 0 = 2D org version + ! 1 = 1D representation raw (not implemented) + ! 2 = 1D + calculate distances inline (implemented) + ! 3 = 1D + calculate distances inline + real*4 internal (not implemented yet) ! other EVP parameters character (len=char_len), public :: & @@ -503,7 +503,7 @@ subroutine dyn_prep2 (nx_block, ny_block, & taubx (i,j) = c0 tauby (i,j) = c0 - if (revp==1) then ! revised evp + if (icetmask(i,j)==0) then stressp_1 (i,j) = c0 stressp_2 (i,j) = c0 stressp_3 (i,j) = c0 @@ -516,20 +516,7 @@ subroutine dyn_prep2 (nx_block, ny_block, & stress12_2(i,j) = c0 stress12_3(i,j) = c0 stress12_4(i,j) = c0 - else if (icetmask(i,j)==0) then ! classic evp - stressp_1 (i,j) = c0 - stressp_2 (i,j) = c0 - stressp_3 (i,j) = c0 - stressp_4 (i,j) = c0 - stressm_1 (i,j) = c0 - stressm_2 (i,j) = c0 - stressm_3 (i,j) = c0 - stressm_4 (i,j) = c0 - stress12_1(i,j) = c0 - stress12_2(i,j) = c0 - stress12_3(i,j) = c0 - stress12_4(i,j) = c0 - endif ! revp + endif enddo ! i enddo ! j @@ -913,8 +900,10 @@ subroutine basal_stress_coeff (nx_block, ny_block, & hu, & ! volume per unit area of ice at u location (mean thickness) hwu, & ! water depth at u location hcu, & ! critical thickness at u location - k2 = 15.0_dbl_kind , & ! second free parameter (N/m^3) for landfast parametrization - alphab = 20.0_dbl_kind ! alphab=Cb factor in Lemieux et al 2015 + k2 = 15.0_dbl_kind , & ! second free parameter (N/m^3) for landfast parametrization + alphab = 20.0_dbl_kind, & ! alphab=Cb factor in Lemieux et al 2015 + threshold_hw = 30.0_dbl_kind ! max water depth for grounding + ! see keel data from Amundrud et al. 2004 (JGR) integer (kind=int_kind) :: & i, j, ij @@ -926,15 +915,21 @@ subroutine basal_stress_coeff (nx_block, ny_block, & j = indxuj(ij) ! convert quantities to u-location - au = max(aice(i,j),aice(i+1,j),aice(i,j+1),aice(i+1,j+1)) + hwu = min(hwater(i,j),hwater(i+1,j),hwater(i,j+1),hwater(i+1,j+1)) - hu = max(vice(i,j),vice(i+1,j),vice(i,j+1),vice(i+1,j+1)) - ! 1- calculate critical thickness - hcu = au * hwu / k1 + if (hwu < threshold_hw) then + + au = max(aice(i,j),aice(i+1,j),aice(i,j+1),aice(i+1,j+1)) + hu = max(vice(i,j),vice(i+1,j),vice(i,j+1),vice(i+1,j+1)) + + ! 1- calculate critical thickness + hcu = au * hwu / k1 - ! 2- calculate basal stress factor - Tbu(i,j) = k2 * max(c0,(hu - hcu)) * exp(-alphab * (c1 - au)) + ! 2- calculate basal stress factor + Tbu(i,j) = k2 * max(c0,(hu - hcu)) * exp(-alphab * (c1 - au)) + + endif enddo ! ij diff --git a/cicecore/cicedynB/general/ice_flux.F90 b/cicecore/cicedynB/general/ice_flux.F90 index d162ec468..29ee2d58f 100644 --- a/cicecore/cicedynB/general/ice_flux.F90 +++ b/cicecore/cicedynB/general/ice_flux.F90 @@ -709,6 +709,11 @@ subroutine init_coupler_flux + vatm(:,:,:)**2) ! wind speed, (m/s) Cdn_atm(:,:,:) = (vonkar/log(zref/iceruf)) & * (vonkar/log(zref/iceruf)) ! atmo drag for RASM + alvdr_init(:,:,:) = c0 + alidr_init(:,:,:) = c0 + alvdf_init(:,:,:) = c0 + alidf_init(:,:,:) = c0 + end subroutine init_coupler_flux diff --git a/cicecore/cicedynB/general/ice_forcing.F90 b/cicecore/cicedynB/general/ice_forcing.F90 index eb222dd9e..4baa88fff 100644 --- a/cicecore/cicedynB/general/ice_forcing.F90 +++ b/cicecore/cicedynB/general/ice_forcing.F90 @@ -4133,7 +4133,8 @@ subroutine hycom_atm_data recnum=min(recnum,Njday_atm-1) ! Check if current time do not exceed last forcing time - if ( hcdate>jday_atm(recnum+1)+p001 ) then + ! + check forcing is available before (or at) current forcing time + if ( hcdate>jday_atm(recnum+1)+p001 .or. hcdate0: 1D. Only ver. 2 is implemented yet) + kevp_kernel = 0 ! EVP kernel (0 = 2D, >0: 1D. Only ver. 2 is implemented yet) brlx = 300.0_dbl_kind ! revised_evp values. Otherwise overwritten in ice_dyn_shared arlx = 300.0_dbl_kind ! revised_evp values. Otherwise overwritten in ice_dyn_shared revised_evp = .false. ! if true, use revised procedure for evp dynamics @@ -546,7 +546,7 @@ subroutine input_data call broadcast_scalar(kdyn, master_task) call broadcast_scalar(ndtd, master_task) call broadcast_scalar(ndte, master_task) - call broadcast_scalar(evp_kernel_ver, master_task) + call broadcast_scalar(kevp_kernel, master_task) call broadcast_scalar(brlx, master_task) call broadcast_scalar(arlx, master_task) call broadcast_scalar(revised_evp, master_task) @@ -889,6 +889,18 @@ subroutine input_data if (my_task == master_task) write(nu_diag,*) subname//' ERROR: formdrag=F and fbot_xfer_type=Cdn_ocn' abort_flag = 19 endif + + if (.not.(trim(dumpfreq) == 'y' .or. trim(dumpfreq) == 'Y' .or. & + trim(dumpfreq) == 'm' .or. trim(dumpfreq) == 'M' .or. & + trim(dumpfreq) == 'd' .or. trim(dumpfreq) == 'D' .or. & + trim(dumpfreq) == 'h' .or. trim(dumpfreq) == 'H' .or. & + trim(dumpfreq) == '1' )) then + if (my_task == master_task) then + write(nu_diag,*) subname//' WARNING: unrecognized dumpfreq=', trim(dumpfreq) + write(nu_diag,*) subname//' WARNING: No restarts files will be written' + write(nu_diag,*) subname//' WARNING: Allowed values : ''y'', ''m'', ''d'', ''h'', ''1''' + endif + endif ice_IOUnitsMinUnit = numin ice_IOUnitsMaxUnit = numax @@ -921,7 +933,7 @@ subroutine input_data write(nu_diag,1020) ' diagfreq = ', diagfreq write(nu_diag,1010) ' print_global = ', print_global write(nu_diag,1010) ' print_points = ', print_points - write(nu_diag,1010) ' bfbflag = ', bfbflag + write(nu_diag,1030) ' bfbflag = ', bfbflag write(nu_diag,1020) ' numin = ', numin write(nu_diag,1020) ' numax = ', numax write(nu_diag,1050) ' histfreq = ', histfreq(:) @@ -984,10 +996,8 @@ subroutine input_data endif write(nu_diag,1020) ' ndtd = ', ndtd write(nu_diag,1020) ' ndte = ', ndte - write(nu_diag,1010) ' revised_evp = ', & - revised_evp - write(nu_diag,1020) ' evp_kernel_ver = ', & - evp_kernel_ver + write(nu_diag,1010) ' revised_evp = ', revised_evp + write(nu_diag,1020) ' kevp_kernel = ', kevp_kernel write(nu_diag,1005) ' brlx = ', brlx write(nu_diag,1005) ' arlx = ', arlx if (kdyn == 1) & @@ -1178,8 +1188,23 @@ subroutine input_data abort_flag = 20 endif + ! check for valid kevp_kernel + ! tcraig, kevp_kernel=2 is not validated, do not allow use + ! use "102" to test "2" for now + if (kevp_kernel /= 0) then + if (kevp_kernel == 102) then + kevp_kernel = 2 + else + if (my_task == master_task) write(nu_diag,*) subname//' ERROR: kevp_kernel = ',kevp_kernel + if (kevp_kernel == 2) then + if (my_task == master_task) write(nu_diag,*) subname//' kevp_kernel=2 not validated, use kevp_kernel=102 for testing until it is validated' + endif + abort_flag = 21 + endif + endif + if (abort_flag /= 0) then - call flush_fileunit(nu_diag) + call flush_fileunit(nu_diag) endif call ice_barrier() if (abort_flag /= 0) then diff --git a/cicecore/cicedynB/general/ice_step_mod.F90 b/cicecore/cicedynB/general/ice_step_mod.F90 index fec45cc8d..fb759cb27 100644 --- a/cicecore/cicedynB/general/ice_step_mod.F90 +++ b/cicecore/cicedynB/general/ice_step_mod.F90 @@ -98,10 +98,10 @@ subroutine prep_radiation (iblk) call ice_timer_start(timer_sw) ! shortwave - alvdr_init(:,:,:) = c0 - alvdf_init(:,:,:) = c0 - alidr_init(:,:,:) = c0 - alidf_init(:,:,:) = c0 + alvdr_init(:,:,iblk) = c0 + alvdf_init(:,:,iblk) = c0 + alidr_init(:,:,iblk) = c0 + alidf_init(:,:,iblk) = c0 this_block = get_block(blocks_ice(iblk),iblk) ilo = this_block%ilo diff --git a/cicecore/cicedynB/infrastructure/comm/mpi/ice_global_reductions.F90 b/cicecore/cicedynB/infrastructure/comm/mpi/ice_global_reductions.F90 index c315d99b9..1357bc2cf 100644 --- a/cicecore/cicedynB/infrastructure/comm/mpi/ice_global_reductions.F90 +++ b/cicecore/cicedynB/infrastructure/comm/mpi/ice_global_reductions.F90 @@ -10,13 +10,15 @@ module ice_global_reductions ! Feb. 2008: Updated from POP version by Elizabeth C. Hunke, LANL ! Aug. 2014: Added bit-for-bit reproducible options for global_sum_dbl ! and global_sum_prod_dbl by T Craig NCAR +! Mar. 2019: Refactored bit-for-bit option, T Craig use ice_kinds_mod use ice_blocks, only: block, get_block, nx_block, ny_block -#ifdef REPRODUCIBLE - use ice_blocks, only: nblocks_tot +#ifdef SERIAL_REMOVE_MPI + use ice_communicate, only: my_task, master_task +#else + use ice_communicate, only: my_task, mpiR16, mpiR8, mpiR4, master_task #endif - use ice_communicate, only: my_task, mpiR8, mpiR4, master_task use ice_constants, only: field_loc_Nface, field_loc_NEcorner use ice_fileunits, only: bfbflag use ice_exit, only: abort_ice @@ -25,11 +27,14 @@ module ice_global_reductions use ice_domain_size, only: nx_global, ny_global, max_blocks use ice_gather_scatter, only: gather_global use icepack_intfc, only: icepack_warnings_flush, icepack_warnings_aborted + use ice_reprosum, only: ice_reprosum_calc implicit none private +#ifndef SERIAL_REMOVE_MPI include 'mpif.h' +#endif public :: global_sum, & global_sum_prod, & @@ -115,29 +120,24 @@ function global_sum_dbl(array, dist, field_loc, mMask, lMask) & ! !----------------------------------------------------------------------- - real (dbl_kind), dimension(:), allocatable :: & - blockSum, &! sum of local block domain - globalSumTmp ! higher precision global sum - integer (int_kind) :: & i,j,iblock,n, &! local counters ib,ie,jb,je, &! beg,end of physical domain - ierr, &! mpi error flag blockID, &! block location numProcs, &! number of processor participating numBlocks, &! number of local blocks communicator, &! communicator for this distribution - nreduce, &! mpi count maxiglob ! maximum non-redundant value of i_global logical (log_kind) :: & Nrow ! this field is on a N row (a velocity row) real (dbl_kind), dimension(:,:), allocatable :: & - workg ! temporary global array - real (dbl_kind), dimension(:,:,:), allocatable :: & work ! temporary local array + real (dbl_kind), dimension(:), allocatable :: & + sums ! array of sums + type (block) :: & this_block ! holds local block information @@ -145,33 +145,21 @@ function global_sum_dbl(array, dist, field_loc, mMask, lMask) & !----------------------------------------------------------------------- - if (bfbflag) then - allocate(work(nx_block,ny_block,max_blocks)) - work = 0.0_dbl_kind - if (my_task == master_task) then - allocate(workg(nx_global,ny_global)) - else - allocate(workg(1,1)) - endif - workg = 0.0_dbl_kind - else -#ifdef REPRODUCIBLE - nreduce = nblocks_tot -#else - nreduce = 1 -#endif - allocate(blockSum(nreduce), & - globalSumTmp(nreduce)) - blockSum = 0.0_dbl_kind - globalSumTmp = 0.0_dbl_kind - globalSum = 0.0_dbl_kind - endif call ice_distributionGet(dist, & numLocalBlocks = numBlocks, & nprocs = numProcs, & communicator = communicator) + if (numBlocks == 0) then + allocate(work(1,1)) + else + allocate(work(nx_block*ny_block*numBlocks,1)) + endif + allocate(sums(1)) + work = 0.0_dbl_kind + sums = 0.0_dbl_kind + do iblock=1,numBlocks call ice_distributionGetBlockID(dist, iblock, blockID) @@ -182,55 +170,13 @@ function global_sum_dbl(array, dist, field_loc, mMask, lMask) & jb = this_block%jlo je = this_block%jhi -#ifdef REPRODUCIBLE - n = blockID -#else - n = 1 -#endif - - if (present(mMask)) then - do j=jb,je - do i=ib,ie - if (bfbflag) then - work(i,j,iblock) = array(i,j,iblock)*mMask(i,j,iblock) - else - blockSum(n) = & - blockSum(n) + array(i,j,iblock)*mMask(i,j,iblock) - endif - end do - end do - else if (present(lMask)) then - do j=jb,je - do i=ib,ie - if (lMask(i,j,iblock)) then - if (bfbflag) then - work(i,j,iblock) = array(i,j,iblock) - else - blockSum(n) = & - blockSum(n) + array(i,j,iblock) - endif - endif - end do - end do - else - do j=jb,je - do i=ib,ie - if (bfbflag) then - work(i,j,iblock) = array(i,j,iblock) - else - blockSum(n) = blockSum(n) + array(i,j,iblock) - endif - end do - end do - endif - !*** if this row along or beyond tripole boundary !*** must eliminate redundant points from global sum - if (.not.bfbflag) then + maxiglob = -1 if (this_block%tripole) then Nrow=(field_loc == field_loc_Nface .or. & - field_loc == field_loc_NEcorner) + field_loc == field_loc_NEcorner) if (Nrow .and. this_block%tripoleTFlag) then maxiglob = 0 ! entire u-row on T-fold grid elseif (Nrow .or. this_block%tripoleTFlag) then @@ -238,61 +184,37 @@ function global_sum_dbl(array, dist, field_loc, mMask, lMask) & else maxiglob = -1 ! nothing to do for T-row on u-fold endif + endif - if (maxiglob > 0) then - - j = je + n = (iblock-1)*nx_block*ny_block + do j=jb,je + do i=ib,ie + n = n + 1 + ! eliminate redundant points + if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then + work(n,1) = 0._dbl_kind + else if (present(mMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - blockSum(n) = & - blockSum(n) - array(i,j,iblock)*mMask(i,j,iblock) - endif - end do + work(n,1) = array(i,j,iblock)*mMask(i,j,iblock) else if (present(lMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - if (lMask(i,j,iblock)) & - blockSum(n) = blockSum(n) - array(i,j,iblock) - endif - end do + if (lMask(i,j,iblock)) then + work(n,1) = array(i,j,iblock) + endif else - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - blockSum(n) = blockSum(n) - array(i,j,iblock) - endif - end do + work(n,1) = array(i,j,iblock) endif - - endif ! maxiglob - endif ! tripole - endif ! bfbflag + endif + end do + end do end do - if (bfbflag) then - call gather_global(workg, work, master_task, dist, spc_val=0.0_dbl_kind) - globalSum = 0.0_dbl_kind - if (my_task == master_task) then - do j = 1, ny_global - do i = 1, nx_global - globalSum = globalSum + workg(i,j) - enddo - enddo - endif - call MPI_BCAST(globalSum,1,mpiR8,master_task,communicator,ierr) - deallocate(workg,work) - else - if (my_task < numProcs) then - call MPI_ALLREDUCE(blockSum, globalSumTmp, nreduce, & - mpiR8, MPI_SUM, communicator, ierr) - endif + call compute_sums_dbl(work,sums,communicator,numProcs) - do n=1,nreduce - globalSum = globalSum + globalSumTmp(n) - enddo - deallocate(blockSum, globalSumTmp) - endif + globalSum = sums(1) + + deallocate(work) + deallocate(sums) !----------------------------------------------------------------------- @@ -306,8 +228,8 @@ function global_sum_real(array, dist, field_loc, mMask, lMask) & ! Computes the global sum of the physical domain of a 2-d array. ! ! This is actually the specific interface for the generic global_sum -! function corresponding to real arrays. The generic -! interface is identical but will handle real and integer 2-d slabs +! function corresponding to single precision arrays. The generic +! interface is identical but will handle double, real and integer 2-d slabs ! and real, integer, and double precision scalars. real (real_kind), dimension(:,:,:), intent(in) :: & @@ -334,21 +256,9 @@ function global_sum_real(array, dist, field_loc, mMask, lMask) & ! !----------------------------------------------------------------------- -#ifdef REPRODUCIBLE - real (dbl_kind) :: & - blockSum, &! sum of local block domain - localSum, &! sum of all local block domains - globalSumTmp ! higher precision global sum -#else - real (real_kind) :: & - blockSum, &! sum of local block domain - localSum ! sum of all local block domains -#endif - integer (int_kind) :: & - i,j,iblock, &! local counters + i,j,iblock,n, &! local counters ib,ie,jb,je, &! beg,end of physical domain - ierr, &! mpi error flag blockID, &! block location numProcs, &! number of processor participating numBlocks, &! number of local blocks @@ -358,6 +268,12 @@ function global_sum_real(array, dist, field_loc, mMask, lMask) & logical (log_kind) :: & Nrow ! this field is on a N row (a velocity row) + real (dbl_kind), dimension(:,:), allocatable :: & + work ! temporary local array + + real (dbl_kind), dimension(:), allocatable :: & + sums ! array of sums + type (block) :: & this_block ! holds local block information @@ -365,18 +281,21 @@ function global_sum_real(array, dist, field_loc, mMask, lMask) & !----------------------------------------------------------------------- -#ifdef REPRODUCIBLE - localSum = 0.0_dbl_kind -#else - localSum = 0.0_real_kind -#endif - globalSum = 0.0_real_kind call ice_distributionGet(dist, & numLocalBlocks = numBlocks, & nprocs = numProcs, & communicator = communicator) + if (numBlocks == 0) then + allocate(work(1,1)) + else + allocate(work(nx_block*ny_block*numBlocks,1)) + endif + allocate(sums(1)) + work = 0.0_dbl_kind + sums = 0.0_dbl_kind + do iblock=1,numBlocks call ice_distributionGetBlockID(dist, iblock, blockID) @@ -387,42 +306,13 @@ function global_sum_real(array, dist, field_loc, mMask, lMask) & jb = this_block%jlo je = this_block%jhi -#ifdef REPRODUCIBLE - blockSum = 0.0_dbl_kind -#else - blockSum = 0.0_real_kind -#endif - - if (present(mMask)) then - do j=jb,je - do i=ib,ie - blockSum = & - blockSum + array(i,j,iblock)*mMask(i,j,iblock) - end do - end do - else if (present(lMask)) then - do j=jb,je - do i=ib,ie - if (lMask(i,j,iblock)) then - blockSum = & - blockSum + array(i,j,iblock) - endif - end do - end do - else - do j=jb,je - do i=ib,ie - blockSum = blockSum + array(i,j,iblock) - end do - end do - endif - !*** if this row along or beyond tripole boundary !*** must eliminate redundant points from global sum + maxiglob = -1 if (this_block%tripole) then Nrow=(field_loc == field_loc_Nface .or. & - field_loc == field_loc_NEcorner) + field_loc == field_loc_NEcorner) if (Nrow .and. this_block%tripoleTFlag) then maxiglob = 0 ! entire u-row on T-fold grid elseif (Nrow .or. this_block%tripoleTFlag) then @@ -430,60 +320,37 @@ function global_sum_real(array, dist, field_loc, mMask, lMask) & else maxiglob = -1 ! nothing to do for T-row on u-fold endif + endif - if (maxiglob > 0) then - - j = je + n = (iblock-1)*nx_block*ny_block + do j=jb,je + do i=ib,ie + n = n + 1 + ! eliminate redundant points + if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then + work(n,1) = 0._dbl_kind + else if (present(mMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - blockSum = & - blockSum - array(i,j,iblock)*mMask(i,j,iblock) - endif - end do + work(n,1) = real(array(i,j,iblock)*mMask(i,j,iblock),dbl_kind) else if (present(lMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - if (lMask(i,j,iblock)) & - blockSum = blockSum - array(i,j,iblock) - endif - end do + if (lMask(i,j,iblock)) then + work(n,1) = real(array(i,j,iblock),dbl_kind) + endif else - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - blockSum = blockSum - array(i,j,iblock) - endif - end do + work(n,1) = real(array(i,j,iblock),dbl_kind) endif - endif - endif - - !*** now add block sum to global sum - - localSum = localSum + blockSum - + end do + end do end do -!----------------------------------------------------------------------- -! -! now use MPI global reduction to reduce local sum to global sum -! -!----------------------------------------------------------------------- + call compute_sums_dbl(work,sums,communicator,numProcs) -#ifdef REPRODUCIBLE - if (my_task < numProcs) then - call MPI_ALLREDUCE(localSum, globalSumTmp, 1, & - mpiR8, MPI_SUM, communicator, ierr) - globalSum = globalSumTmp - endif -#else - if (my_task < numProcs) then - call MPI_ALLREDUCE(localSum, globalSum, 1, & - mpiR4, MPI_SUM, communicator, ierr) - endif -#endif + globalSum = real(sums(1),real_kind) + + deallocate(work) + deallocate(sums) !----------------------------------------------------------------------- @@ -567,38 +434,13 @@ function global_sum_int(array, dist, field_loc, mMask, lMask) & jb = this_block%jlo je = this_block%jhi - blockSum = 0 - - if (present(mMask)) then - do j=jb,je - do i=ib,ie - blockSum = & - blockSum + array(i,j,iblock)*mMask(i,j,iblock) - end do - end do - else if (present(lMask)) then - do j=jb,je - do i=ib,ie - if (lMask(i,j,iblock)) then - blockSum = & - blockSum + array(i,j,iblock) - endif - end do - end do - else - do j=jb,je - do i=ib,ie - blockSum = blockSum + array(i,j,iblock) - end do - end do - endif - !*** if this row along or beyond tripole boundary !*** must eliminate redundant points from global sum + maxiglob = -1 if (this_block%tripole) then Nrow=(field_loc == field_loc_Nface .or. & - field_loc == field_loc_NEcorner) + field_loc == field_loc_NEcorner) if (Nrow .and. this_block%tripoleTFlag) then maxiglob = 0 ! entire u-row on T-fold grid elseif (Nrow .or. this_block%tripoleTFlag) then @@ -606,35 +448,28 @@ function global_sum_int(array, dist, field_loc, mMask, lMask) & else maxiglob = -1 ! nothing to do for T-row on u-fold endif + endif - if (maxiglob > 0) then - - j = je + blockSum = 0_int_kind + do j=jb,je + do i=ib,ie + ! eliminate redundant points + if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then +! blockSum = blockSum + 0_int_kind + else if (present(mMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - blockSum = & - blockSum - array(i,j,iblock)*mMask(i,j,iblock) - endif - end do + blockSum = blockSum + array(i,j,iblock)*mMask(i,j,iblock) else if (present(lMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - if (lMask(i,j,iblock)) & - blockSum = blockSum - array(i,j,iblock) - endif - end do + if (lMask(i,j,iblock)) then + blockSum = blockSum + array(i,j,iblock) + endif else - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - blockSum = blockSum - array(i,j,iblock) - endif - end do + blockSum = blockSum + array(i,j,iblock) endif - endif - endif + end do + end do !*** now add block sum to global sum @@ -648,10 +483,14 @@ function global_sum_int(array, dist, field_loc, mMask, lMask) & ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI + globalSum = localSum +#else if (my_task < numProcs) then call MPI_ALLREDUCE(localSum, globalSum, 1, & MPI_INTEGER, MPI_SUM, communicator, ierr) endif +#endif !----------------------------------------------------------------------- @@ -691,10 +530,11 @@ function global_sum_scalar_dbl(scalar, dist) & numBlocks, &! number of local blocks communicator ! communicator for this distribution -!#ifdef REPRODUCIBLE -! real (r16_kind) :: & -! scalarTmp, globalSumTmp ! higher precision for reproducibility -!#endif + real (dbl_kind), dimension(:,:), allocatable :: & + work ! temporary local array + + real (dbl_kind), dimension(:), allocatable :: & + sums ! array of sums character(len=*), parameter :: subname = '(global_sum_scalar_dbl)' @@ -709,27 +549,18 @@ function global_sum_scalar_dbl(scalar, dist) & nprocs = numProcs, & communicator = communicator) -!----------------------------------------------------------------------- -! -! now use MPI global reduction to reduce local sum to global sum -! REPRODUCIBLE option is commented out because MPI does not handle -! REAL16 correctly. -! -!----------------------------------------------------------------------- -!#ifdef REPRODUCIBLE -! if (my_task < numProcs) then -! scalarTmp = scalar -! call MPI_ALLREDUCE(scalarTmp, globalSumTmp, 1, & -! mpiR16, MPI_SUM, communicator, ierr) -! globalSum = globalSumTmp -! endif -!#else - if (my_task < numProcs) then - call MPI_ALLREDUCE(scalar, globalSum, 1, & - mpiR8, MPI_SUM, communicator, ierr) - endif -!#endif + allocate(work(1,1)) + allocate(sums(1)) + work(1,1) = scalar + sums = 0.0_dbl_kind + + call compute_sums_dbl(work,sums,communicator,numProcs) + + globalSum = sums(1) + + deallocate(work) + deallocate(sums) !----------------------------------------------------------------------- @@ -769,10 +600,11 @@ function global_sum_scalar_real(scalar, dist) & numBlocks, &! number of local blocks communicator ! communicator for this distribution -#ifdef REPRODUCIBLE - real (dbl_kind) :: & - scalarTmp, globalSumTmp ! higher precision for reproducibility -#endif + real (dbl_kind), dimension(:,:), allocatable :: & + work ! temporary local array + + real (dbl_kind), dimension(:), allocatable :: & + sums ! array of sums character(len=*), parameter :: subname = '(global_sum_scalar_real)' @@ -787,25 +619,17 @@ function global_sum_scalar_real(scalar, dist) & nprocs = numProcs, & communicator = communicator) -!----------------------------------------------------------------------- -! -! now use MPI global reduction to reduce local sum to global sum -! -!----------------------------------------------------------------------- + allocate(work(1,1)) + allocate(sums(1)) + work(1,1) = real(scalar,dbl_kind) + sums = 0.0_dbl_kind -#ifdef REPRODUCIBLE - if (my_task < numProcs) then - scalarTmp = scalar - call MPI_ALLREDUCE(scalarTmp, globalSumTmp, 1, & - mpiR8, MPI_SUM, communicator, ierr) - globalSum = globalSumTmp - endif -#else - if (my_task < numProcs) then - call MPI_ALLREDUCE(scalar, globalSum, 1, & - mpiR4, MPI_SUM, communicator, ierr) - endif -#endif + call compute_sums_dbl(work,sums,communicator,numProcs) + + globalSum = real(sums(1),real_kind) + + deallocate(work) + deallocate(sums) !----------------------------------------------------------------------- @@ -864,10 +688,14 @@ function global_sum_scalar_int(scalar, dist) & ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI + globalSum = scalar +#else if (my_task < numProcs) then call MPI_ALLREDUCE(scalar, globalSum, 1, & MPI_INTEGER, MPI_SUM, communicator, ierr) endif +#endif !----------------------------------------------------------------------- @@ -911,28 +739,23 @@ function global_sum_prod_dbl (array1, array2, dist, field_loc, & ! !----------------------------------------------------------------------- - real (dbl_kind), dimension(:), allocatable :: & - blockSum, &! sum of local block domain - globalSumTmp ! higher precision global sum - integer (int_kind) :: & - i,j,iblock,n, &! local counters - ib,ie,jb,je, &! beg,end of physical domain - ierr, &! mpi error flag - blockID, &! block location - numBlocks, &! number of local blocks - numProcs, &! number of processor participating - communicator, &! communicator for this distribution - nreduce, &! mpi count - maxiglob ! maximum non-redundant value of i_global + i,j,iblock,n, &! local counters + ib,ie,jb,je, &! beg,end of physical domain + blockID, &! block location + numProcs, &! number of processor participating + numBlocks, &! number of local blocks + communicator, &! communicator for this distribution + maxiglob ! maximum non-redundant value of i_global logical (log_kind) :: & Nrow ! this field is on a N row (a velocity row) real (dbl_kind), dimension(:,:), allocatable :: & - workg ! temporary global array - real (dbl_kind), dimension(:,:,:), allocatable :: & - work ! tempoerary local array + work ! temporary local array + + real (dbl_kind), dimension(:), allocatable :: & + sums ! array of sums type (block) :: & this_block ! holds local block information @@ -941,33 +764,21 @@ function global_sum_prod_dbl (array1, array2, dist, field_loc, & !----------------------------------------------------------------------- - if (bfbflag) then - allocate(work(nx_block,ny_block,max_blocks)) - work = 0.0_dbl_kind - if (my_task == master_task) then - allocate(workg(nx_global,ny_global)) - else - allocate(workg(1,1)) - endif - workg = 0.0_dbl_kind - else -#ifdef REPRODUCIBLE - nreduce = nblocks_tot -#else - nreduce = 1 -#endif - allocate(blockSum(nreduce), & - globalSumTmp(nreduce)) - blockSum = 0.0_dbl_kind - globalSumTmp = 0.0_dbl_kind - globalSum = 0.0_dbl_kind - endif call ice_distributionGet(dist, & numLocalBlocks = numBlocks, & nprocs = numProcs, & communicator = communicator) + if (numBlocks == 0) then + allocate(work(1,1)) + else + allocate(work(nx_block*ny_block*numBlocks,1)) + endif + allocate(sums(1)) + work = 0.0_dbl_kind + sums = 0.0_dbl_kind + do iblock=1,numBlocks call ice_distributionGetBlockID(dist, iblock, blockID) @@ -978,57 +789,13 @@ function global_sum_prod_dbl (array1, array2, dist, field_loc, & jb = this_block%jlo je = this_block%jhi -#ifdef REPRODUCIBLE - n = blockID -#else - n = 1 -#endif - - if (present(mMask)) then - do j=jb,je - do i=ib,ie - if (bfbflag) then - work(i,j,iblock) = array1(i,j,iblock)*array2(i,j,iblock)* & - mMask(i,j,iblock) - else - blockSum(n) = & - blockSum(n) + array1(i,j,iblock)*array2(i,j,iblock)* & - mMask(i,j,iblock) - endif - end do - end do - else if (present(lMask)) then - do j=jb,je - do i=ib,ie - if (lMask(i,j,iblock)) then - if (bfbflag) then - work(i,j,iblock) = array1(i,j,iblock)*array2(i,j,iblock) - else - blockSum(n) = & - blockSum(n) + array1(i,j,iblock)*array2(i,j,iblock) - endif - endif - end do - end do - else - do j=jb,je - do i=ib,ie - if (bfbflag) then - work(i,j,iblock) = array1(i,j,iblock)*array2(i,j,iblock) - else - blockSum(n) = blockSum(n) + array1(i,j,iblock)*array2(i,j,iblock) - endif - end do - end do - endif - !*** if this row along or beyond tripole boundary !*** must eliminate redundant points from global sum - if (.not.bfbflag) then + maxiglob = -1 if (this_block%tripole) then Nrow=(field_loc == field_loc_Nface .or. & - field_loc == field_loc_NEcorner) + field_loc == field_loc_NEcorner) if (Nrow .and. this_block%tripoleTFlag) then maxiglob = 0 ! entire u-row on T-fold grid elseif (Nrow .or. this_block%tripoleTFlag) then @@ -1036,64 +803,37 @@ function global_sum_prod_dbl (array1, array2, dist, field_loc, & else maxiglob = -1 ! nothing to do for T-row on u-fold endif + endif - if (maxiglob > 0) then - - j = je + n = (iblock-1)*nx_block*ny_block + do j=jb,je + do i=ib,ie + n = n + 1 + ! eliminate redundant points + if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then + work(n,1) = 0._dbl_kind + else if (present(mMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - blockSum(n) = & - blockSum(n) - array1(i,j,iblock)*array2(i,j,iblock)* & - mMask(i,j,iblock) - endif - end do + work(n,1) = array1(i,j,iblock)*array2(i,j,iblock)*mMask(i,j,iblock) else if (present(lMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - if (lMask(i,j,iblock)) & - blockSum(n) = blockSum(n) - & - array1(i,j,iblock)*array2(i,j,iblock) - endif - end do + if (lMask(i,j,iblock)) then + work(n,1) = array1(i,j,iblock)*array2(i,j,iblock) + endif else - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - blockSum(n) = blockSum(n) - & - array1(i,j,iblock)*array2(i,j,iblock) - endif - end do + work(n,1) = array1(i,j,iblock)*array2(i,j,iblock) endif - - endif ! maxiglob - endif ! tripole - endif ! bfbflag + endif + end do + end do end do - if (bfbflag) then - call gather_global(workg, work, master_task, dist, spc_val=0.0_dbl_kind) - globalSum = 0.0_dbl_kind - if (my_task == master_task) then - do j = 1, ny_global - do i = 1, nx_global - globalSum = globalSum + workg(i,j) - enddo - enddo - endif - call MPI_BCAST(globalSum,1,mpiR8,master_task,communicator,ierr) - deallocate(workg,work) - else - if (my_task < numProcs) then - call MPI_ALLREDUCE(blockSum, globalSumTmp, nreduce, & - mpiR8, MPI_SUM, communicator, ierr) - endif + call compute_sums_dbl(work,sums,communicator,numProcs) - do n=1,nreduce - globalSum = globalSum + globalSumTmp(n) - enddo - deallocate(blockSum, globalSumTmp) - endif + globalSum = sums(1) + + deallocate(work) + deallocate(sums) !----------------------------------------------------------------------- @@ -1137,49 +877,46 @@ function global_sum_prod_real (array1, array2, dist, field_loc, & ! !----------------------------------------------------------------------- -#ifdef REPRODUCIBLE - real (dbl_kind) :: & - blockSum, &! sum of local block domain - localSum, &! sum of all local block domains - globalSumTmp ! higher precision for reproducibility -#else - real (real_kind) :: & - blockSum, &! sum of local block domain - localSum ! sum of all local block domains -#endif - integer (int_kind) :: & - i,j,iblock, &! local counters - ib,ie,jb,je, &! beg,end of physical domain - ierr, &! mpi error flag - blockID, &! block location - numBlocks, &! number of local blocks - numProcs, &! number of processor participating - communicator, &! communicator for this distribution - maxiglob ! maximum non-redundant value of i_global + i,j,iblock,n, &! local counters + ib,ie,jb,je, &! beg,end of physical domain + blockID, &! block location + numProcs, &! number of processor participating + numBlocks, &! number of local blocks + communicator, &! communicator for this distribution + maxiglob ! maximum non-redundant value of i_global logical (log_kind) :: & Nrow ! this field is on a N row (a velocity row) + real (dbl_kind), dimension(:,:), allocatable :: & + work ! temporary local array + + real (dbl_kind), dimension(:), allocatable :: & + sums ! array of sums + type (block) :: & - this_block ! holds local block information + this_block ! holds local block information - character(len=*), parameter :: subname = '(global_sum_prod_real)' + character(len=*), parameter :: subname = '(global_sum_prod_dbl)' !----------------------------------------------------------------------- -#ifdef REPRODUCIBLE - localSum = 0.0_dbl_kind -#else - localSum = 0.0_real_kind -#endif - globalSum = 0.0_real_kind - call ice_distributionGet(dist, & + call ice_distributionGet(dist, & numLocalBlocks = numBlocks, & - nprocs = numProcs, & + nprocs = numProcs, & communicator = communicator) + if (numBlocks == 0) then + allocate(work(1,1)) + else + allocate(work(nx_block*ny_block*numBlocks,1)) + endif + allocate(sums(1)) + work = 0.0_dbl_kind + sums = 0.0_dbl_kind + do iblock=1,numBlocks call ice_distributionGetBlockID(dist, iblock, blockID) @@ -1190,43 +927,13 @@ function global_sum_prod_real (array1, array2, dist, field_loc, & jb = this_block%jlo je = this_block%jhi -#ifdef REPRODUCIBLE - blockSum = 0.0_dbl_kind -#else - blockSum = 0.0_real_kind -#endif - - if (present(mMask)) then - do j=jb,je - do i=ib,ie - blockSum = & - blockSum + array1(i,j,iblock)*array2(i,j,iblock)* & - mMask(i,j,iblock) - end do - end do - else if (present(lMask)) then - do j=jb,je - do i=ib,ie - if (lMask(i,j,iblock)) then - blockSum = & - blockSum + array1(i,j,iblock)*array2(i,j,iblock) - endif - end do - end do - else - do j=jb,je - do i=ib,ie - blockSum = blockSum + array1(i,j,iblock)*array2(i,j,iblock) - end do - end do - endif - !*** if this row along or beyond tripole boundary !*** must eliminate redundant points from global sum + maxiglob = -1 if (this_block%tripole) then Nrow=(field_loc == field_loc_Nface .or. & - field_loc == field_loc_NEcorner) + field_loc == field_loc_NEcorner) if (Nrow .and. this_block%tripoleTFlag) then maxiglob = 0 ! entire u-row on T-fold grid elseif (Nrow .or. this_block%tripoleTFlag) then @@ -1234,63 +941,37 @@ function global_sum_prod_real (array1, array2, dist, field_loc, & else maxiglob = -1 ! nothing to do for T-row on u-fold endif + endif - if (maxiglob > 0) then - - j = je + n = (iblock-1)*nx_block*ny_block + do j=jb,je + do i=ib,ie + n = n + 1 + ! eliminate redundant points + if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then + work(n,1) = 0._dbl_kind + else if (present(mMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - blockSum = & - blockSum - array1(i,j,iblock)*array2(i,j,iblock)* & - mMask(i,j,iblock) - endif - end do + work(n,1) = real(array1(i,j,iblock)*array2(i,j,iblock)*mMask(i,j,iblock),dbl_kind) else if (present(lMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - if (lMask(i,j,iblock)) & - blockSum = blockSum - & - array1(i,j,iblock)*array2(i,j,iblock) - endif - end do + if (lMask(i,j,iblock)) then + work(n,1) = real(array1(i,j,iblock)*array2(i,j,iblock),dbl_kind) + endif else - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - blockSum = blockSum - & - array1(i,j,iblock)*array2(i,j,iblock) - endif - end do + work(n,1) = real(array1(i,j,iblock)*array2(i,j,iblock),dbl_kind) endif - endif - endif - - !*** now add block sum to global sum - - localSum = localSum + blockSum - + end do + end do end do -!----------------------------------------------------------------------- -! -! now use MPI global reduction to reduce local sum to global sum -! -!----------------------------------------------------------------------- + call compute_sums_dbl(work,sums,communicator,numProcs) -#ifdef REPRODUCIBLE - if (my_task < numProcs) then - call MPI_ALLREDUCE(localSum, globalSumTmp, 1, & - mpiR8, MPI_SUM, communicator, ierr) - globalSum = globalSumTmp - endif -#else - if (my_task < numProcs) then - call MPI_ALLREDUCE(localSum, globalSum, 1, & - mpiR4, MPI_SUM, communicator, ierr) - endif -#endif + globalSum = real(sums(1),real_kind) + + deallocate(work) + deallocate(sums) !----------------------------------------------------------------------- @@ -1376,39 +1057,13 @@ function global_sum_prod_int (array1, array2, dist, field_loc, & jb = this_block%jlo je = this_block%jhi - blockSum = 0 - - if (present(mMask)) then - do j=jb,je - do i=ib,ie - blockSum = & - blockSum + array1(i,j,iblock)*array2(i,j,iblock)* & - mMask(i,j,iblock) - end do - end do - else if (present(lMask)) then - do j=jb,je - do i=ib,ie - if (lMask(i,j,iblock)) then - blockSum = & - blockSum + array1(i,j,iblock)*array2(i,j,iblock) - endif - end do - end do - else - do j=jb,je - do i=ib,ie - blockSum = blockSum + array1(i,j,iblock)*array2(i,j,iblock) - end do - end do - endif - !*** if this row along or beyond tripole boundary !*** must eliminate redundant points from global sum + maxiglob = -1 if (this_block%tripole) then Nrow=(field_loc == field_loc_Nface .or. & - field_loc == field_loc_NEcorner) + field_loc == field_loc_NEcorner) if (Nrow .and. this_block%tripoleTFlag) then maxiglob = 0 ! entire u-row on T-fold grid elseif (Nrow .or. this_block%tripoleTFlag) then @@ -1416,38 +1071,28 @@ function global_sum_prod_int (array1, array2, dist, field_loc, & else maxiglob = -1 ! nothing to do for T-row on u-fold endif + endif - if (maxiglob > 0) then - - j = je + blockSum = 0_int_kind + do j=jb,je + do i=ib,ie + ! eliminate redundant points + if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then +! blockSum = blockSum + 0_int_kind + else if (present(mMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - blockSum = & - blockSum - array1(i,j,iblock)*array2(i,j,iblock)* & - mMask(i,j,iblock) - endif - end do + blockSum = blockSum + array1(i,j,iblock)*array2(i,j,iblock)*mMask(i,j,iblock) else if (present(lMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - if (lMask(i,j,iblock)) & - blockSum = blockSum - & - array1(i,j,iblock)*array2(i,j,iblock) - endif - end do + if (lMask(i,j,iblock)) then + blockSum = blockSum + array1(i,j,iblock)*array2(i,j,iblock) + endif else - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - blockSum = blockSum - & - array1(i,j,iblock)*array2(i,j,iblock) - endif - end do + blockSum = blockSum + array1(i,j,iblock)*array2(i,j,iblock) endif - endif - endif + end do + end do !*** now add block sum to global sum @@ -1461,10 +1106,14 @@ function global_sum_prod_int (array1, array2, dist, field_loc, & ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI + globalSum = localSum +#else if (my_task < numProcs) then call MPI_ALLREDUCE(localSum, globalSum, 1, & MPI_INTEGER, MPI_SUM, communicator, ierr) endif +#endif !----------------------------------------------------------------------- @@ -1564,10 +1213,14 @@ function global_maxval_dbl (array, dist, lMask) & ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI + globalMaxval = localMaxval +#else if (my_task < numProcs) then call MPI_ALLREDUCE(localMaxval, globalMaxval, 1, & mpiR8, MPI_MAX, communicator, ierr) endif +#endif !----------------------------------------------------------------------- @@ -1667,10 +1320,14 @@ function global_maxval_real (array, dist, lMask) & ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI + globalMaxval = localMaxval +#else if (my_task < numProcs) then call MPI_ALLREDUCE(localMaxval, globalMaxval, 1, & mpiR4, MPI_MAX, communicator, ierr) endif +#endif !----------------------------------------------------------------------- @@ -1770,10 +1427,14 @@ function global_maxval_int (array, dist, lMask) & ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI + globalMaxval = localMaxval +#else if (my_task < numProcs) then call MPI_ALLREDUCE(localMaxval, globalMaxval, 1, & MPI_INTEGER, MPI_MAX, communicator, ierr) endif +#endif !----------------------------------------------------------------------- @@ -1824,10 +1485,14 @@ function global_maxval_scalar_dbl (scalar, dist) & ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI + globalMaxval = scalar +#else if (my_task < numProcs) then call MPI_ALLREDUCE(scalar, globalMaxval, 1, & mpiR8, MPI_MAX, communicator, ierr) endif +#endif !----------------------------------------------------------------------- @@ -1878,10 +1543,14 @@ function global_maxval_scalar_real (scalar, dist) & ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI + globalMaxval = scalar +#else if (my_task < numProcs) then call MPI_ALLREDUCE(scalar, globalMaxval, 1, & mpiR4, MPI_MAX, communicator, ierr) endif +#endif !----------------------------------------------------------------------- @@ -1932,10 +1601,14 @@ function global_maxval_scalar_int (scalar, dist) & ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI + globalMaxval = scalar +#else if (my_task < numProcs) then call MPI_ALLREDUCE(scalar, globalMaxval, 1, & MPI_INTEGER, MPI_MAX, communicator, ierr) endif +#endif !----------------------------------------------------------------------- @@ -2035,10 +1708,14 @@ function global_minval_dbl (array, dist, lMask) & ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI + globalMinval = localMinval +#else if (my_task < numProcs) then call MPI_ALLREDUCE(localMinval, globalMinval, 1, & mpiR8, MPI_MIN, communicator, ierr) endif +#endif !----------------------------------------------------------------------- @@ -2138,10 +1815,14 @@ function global_minval_real (array, dist, lMask) & ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI + globalMinval = localMinval +#else if (my_task < numProcs) then call MPI_ALLREDUCE(localMinval, globalMinval, 1, & mpiR4, MPI_MIN, communicator, ierr) endif +#endif !----------------------------------------------------------------------- @@ -2241,10 +1922,14 @@ function global_minval_int (array, dist, lMask) & ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI + globalMinval = localMinval +#else if (my_task < numProcs) then call MPI_ALLREDUCE(localMinval, globalMinval, 1, & MPI_INTEGER, MPI_MIN, communicator, ierr) endif +#endif !----------------------------------------------------------------------- @@ -2295,10 +1980,14 @@ function global_minval_scalar_dbl (scalar, dist) & ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI + globalMinval = scalar +#else if (my_task < numProcs) then call MPI_ALLREDUCE(scalar, globalMinval, 1, & mpiR8, MPI_MIN, communicator, ierr) endif +#endif !----------------------------------------------------------------------- @@ -2349,10 +2038,14 @@ function global_minval_scalar_real (scalar, dist) & ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI + globalMinval = scalar +#else if (my_task < numProcs) then call MPI_ALLREDUCE(scalar, globalMinval, 1, & mpiR4, MPI_MIN, communicator, ierr) endif +#endif !----------------------------------------------------------------------- @@ -2403,17 +2096,168 @@ function global_minval_scalar_int (scalar, dist) & ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI + globalMinval = scalar +#else if (my_task < numProcs) then call MPI_ALLREDUCE(scalar, globalMinval, 1, & MPI_INTEGER, MPI_MIN, communicator, ierr) endif +#endif !----------------------------------------------------------------------- end function global_minval_scalar_int +!*********************************************************************** +!*********************************************************************** + +subroutine compute_sums_dbl(array2,sums8,mpicomm,numprocs) + +! Computes the global sum of a 2-d array over fields +! with first dimension values and second dimension fields +! +! Several different options are supported. +! lsum4 = local sum with real*4 and scalar mpi allreduce, unlikely to be bfb +! lsum8 = local sum with real*8 and scalar mpi allreduce, unlikely to be bfb +! lsum16 = local sum with real*16 and scalar mpi allreduce, likely to be bfb +! WARNING: this does not work in several compilers and mpi +! implementations due to support for quad precision and consistency +! between underlying datatype in fortran and c. The source code +! can be turned off with a cpp NO_R16. Otherwise, it is recommended +! that the results be validated on any platform where it might be used. +! reprosum = fixed point method based on ordered double integer sums. +! that requires two scalar reductions per global sum. +! This is extremely likely to be bfb. +! (See Mirin and Worley, 2012, IJHPCA, 26, 1730, +! https://journals.sagepub.com/doi/10.1177/1094342011412630) +! ddpdd = parallel double-double algorithm using single scalar reduction. +! This is very likely to be bfb. +! (See He and Ding, 2001, Journal of Supercomputing, 18, 259, +! https://link.springer.com/article/10.1023%2FA%3A1008153532043) + + real (dbl_kind), dimension(:,:), intent(in) :: & + array2 ! array to be summed + + real (dbl_kind), dimension(:), intent(inout) :: & + sums8 ! resulting global sum + + integer(int_kind), intent(in) :: & + mpicomm + + integer(int_kind), intent(in) :: & + numprocs + +!----------------------------------------------------------------------- +! +! local variables +! +!----------------------------------------------------------------------- + + real (real_kind), allocatable :: psums4(:) + real (real_kind), allocatable :: sums4(:) + real (dbl_kind) , allocatable :: psums8(:) +#ifndef NO_R16 + real (r16_kind) , allocatable :: psums16(:) + real (r16_kind) , allocatable :: sums16(:) +#endif + + integer (int_kind) :: ns,nf,i,j, ierr + + character(len=*), parameter :: subname = '(compute_sums_dbl)' + +!----------------------------------------------------------------------- + + sums8 = 0._dbl_kind + ns = size(array2,dim=1) + nf = size(array2,dim=2) + + if (bfbflag == 'off' .or. bfbflag == 'lsum8') then + allocate(psums8(nf)) + psums8(:) = 0._dbl_kind + + do j = 1, nf + do i = 1, ns + psums8(j) = psums8(j) + array2(i,j) + enddo + enddo + +#ifdef SERIAL_REMOVE_MPI + sums8 = psums8 +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(psums8, sums8, nf, mpiR8, MPI_SUM, mpicomm, ierr) + endif +#endif + + deallocate(psums8) + +#ifndef NO_R16 + elseif (bfbflag == 'lsum16') then + allocate(psums16(nf)) + psums16(:) = 0._r16_kind + allocate(sums16(nf)) + sums16(:) = 0._r16_kind + + do j = 1, nf + do i = 1, ns + psums16(j) = psums16(j) + real(array2(i,j),r16_kind) + enddo + enddo + +#ifdef SERIAL_REMOVE_MPI + sums16 = psums16 +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(psums16, sums16, nf, mpiR16, MPI_SUM, mpicomm, ierr) + endif +#endif + sums8 = real(sums16,dbl_kind) + + deallocate(psums16,sums16) +#endif + + elseif (bfbflag == 'lsum4') then + allocate(psums4(nf)) + psums4(:) = 0._real_kind + allocate(sums4(nf)) + sums4(:) = 0._real_kind + + do j = 1, nf + do i = 1, ns + psums4(j) = psums4(j) + real(array2(i,j),real_kind) + enddo + enddo + +#ifdef SERIAL_REMOVE_MPI + sums4 = psums4 +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(psums4, sums4, nf, mpiR4, MPI_SUM, mpicomm, ierr) + endif +#endif + sums8 = real(sums4,dbl_kind) + + deallocate(psums4,sums4) + + elseif (bfbflag == 'ddpdd') then + if (my_task < numProcs) then + call ice_reprosum_calc(array2,sums8,ns,ns,nf,ddpdd_sum=.true.,commid=mpicomm) + endif + + elseif (bfbflag == 'reprosum') then + if (my_task < numProcs) then + call ice_reprosum_calc(array2,sums8,ns,ns,nf,ddpdd_sum=.false.,commid=mpicomm) + endif + + else + call abort_ice(subname//'ERROR: bfbflag unknown '//trim(bfbflag)) + endif + +end subroutine compute_sums_dbl + !*********************************************************************** - end module ice_global_reductions +end module ice_global_reductions !||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| diff --git a/cicecore/cicedynB/infrastructure/comm/mpi/ice_reprosum.F90 b/cicecore/cicedynB/infrastructure/comm/mpi/ice_reprosum.F90 new file mode 100644 index 000000000..ab1c5969a --- /dev/null +++ b/cicecore/cicedynB/infrastructure/comm/mpi/ice_reprosum.F90 @@ -0,0 +1,1481 @@ + +!> Reproducible sum method from P. Worley + +MODULE ice_reprosum + +!----------------------------------------------------------------------- +! +! Purpose: +!> Compute reproducible global sums of a set of arrays across an MPI +!> subcommunicator +! +! Methods: +!> Compute using either or both a scalable, reproducible algorithm and a +!> scalable, nonreproducible algorithm: +!> * Reproducible (scalable): +!> Convert to fixed point (integer vector representation) to enable +!> reproducibility when using MPI_Allreduce +!> * Alternative usually reproducible (scalable): +!> Use parallel double-double algorithm due to Helen He and +!> Chris Ding, based on David Bailey's/Don Knuth's DDPDD algorithm +!> * Nonreproducible (scalable): +!> Floating point and MPI_Allreduce based. +!> If computing both reproducible and nonreproducible sums, compare +!> these and report relative difference (if absolute difference +!> less than sum) or absolute difference back to calling routine. +! +! Author: P. Worley (based on suggestions from J. White for fixed +! point algorithm and on He/Ding paper for ddpdd +! algorithm) +! +! Modified by T.Craig for CICE, March 2019 based on the public version in +! Oasis3-MCT_4.0. +! +!----------------------------------------------------------------------- + +!----------------------------------------------------------------------- +!- use statements ------------------------------------------------------ +!----------------------------------------------------------------------- +#if ( defined noI8 ) + ! Workaround for when shr_kind_i8 is not supported. + use ice_kinds_mod, only: r8 => dbl_kind, i8 => int_kind +#else + use ice_kinds_mod, only: r8 => dbl_kind, i8 => int8_kind +#endif + use ice_kinds_mod, only: char_len_long + use ice_fileunits, only: nu_diag + use ice_exit, only: abort_ice + +! internal timers not yet implemented, need to revisit if needed +! use ice_mpi, only: xicex_mpi_barrier +! use ice_timer, only: xicex_timer_start, xicex_timer_stop + +!----------------------------------------------------------------------- +!- module boilerplate -------------------------------------------------- +!----------------------------------------------------------------------- + implicit none + private + +!----------------------------------------------------------------------- +!- include statements -------------------------------------------------- +!----------------------------------------------------------------------- +#ifndef SERIAL_REMOVE_MPI +#include +#endif + +!----------------------------------------------------------------------- +! Public interfaces ---------------------------------------------------- +!----------------------------------------------------------------------- + public :: & + ice_reprosum_setopts, &! set runtime options + ice_reprosum_calc, &! calculate distributed sum + ice_reprosum_tolExceeded ! utility function to check relative + ! differences against the tolerance + +!----------------------------------------------------------------------- +! Public data ---------------------------------------------------------- +!----------------------------------------------------------------------- + logical, public :: ice_reprosum_recompute = .false. + + real(r8), public :: ice_reprosum_reldiffmax = -1.0_r8 + +!----------------------------------------------------------------------- +! Private interfaces --------------------------------------------------- +!----------------------------------------------------------------------- + private :: & + ddpdd, &! double-double sum routine + split_indices ! split indices among OMP threads + +!----------------------------------------------------------------------- +! Private data --------------------------------------------------------- +!----------------------------------------------------------------------- + + logical :: repro_sum_use_ddpdd = .false. + logical :: detailed_timing = .false. + character(len=char_len_long) :: tmpstr + + CONTAINS + +!======================================================================== +!----------------------------------------------------------------------- +! Purpose: +!> Set runtime options +! Author: P. Worley +!----------------------------------------------------------------------- + + subroutine ice_reprosum_setopts(repro_sum_use_ddpdd_in, & + repro_sum_rel_diff_max_in, & + repro_sum_recompute_in, & + repro_sum_master, & + repro_sum_logunit ) + +!------------------------------Arguments-------------------------------- + logical, intent(in), optional :: repro_sum_use_ddpdd_in + !< Use DDPDD algorithm instead of fixed precision algorithm + real(r8), intent(in), optional :: repro_sum_rel_diff_max_in + !< maximum permissible difference between reproducible and + !< nonreproducible sums + logical, intent(in), optional :: repro_sum_recompute_in + !< recompute using different algorithm when difference between + !< reproducible and nonreproducible sums is too great + logical, intent(in), optional :: repro_sum_master + !< flag indicating whether this process should output + !< log messages + integer, intent(in), optional :: repro_sum_logunit + !< unit number for log messages +!---------------------------Local Workspace----------------------------- + integer llogunit ! unit number for log messages + logical master ! local master? + logical,save :: firstcall = .true. ! first call + character(len=*),parameter :: subname = '(ice_reprosum_setopts)' +!----------------------------------------------------------------------- + + if ( present(repro_sum_master) ) then + master = repro_sum_master + else + master = .false. + endif + + if ( present(repro_sum_logunit) ) then + llogunit = repro_sum_logunit + else + llogunit = nu_diag + endif + + if (.not. firstcall) then + write(tmpstr,*) subname//' ERROR: can only be called once' + call abort_ice(tmpstr,file=__FILE__,line=__LINE__) + endif + firstcall = .false. + + if ( present(repro_sum_use_ddpdd_in) ) then + repro_sum_use_ddpdd = repro_sum_use_ddpdd_in + endif + if ( present(repro_sum_rel_diff_max_in) ) then + ice_reprosum_reldiffmax = repro_sum_rel_diff_max_in + endif + if ( present(repro_sum_recompute_in) ) then + ice_reprosum_recompute = repro_sum_recompute_in + endif + if (master) then + if ( repro_sum_use_ddpdd ) then + write(llogunit,*) subname, & + 'Using double-double-based (scalable) usually reproducible ', & + 'distributed sum algorithm' + else + write(llogunit,*) subname, & + 'Using fixed-point-based (scalable) reproducible ', & + 'distributed sum algorithm' + endif + + if (ice_reprosum_reldiffmax >= 0._r8) then + write(llogunit,*) subname, & + ' with a maximum relative error tolerance of ', & + ice_reprosum_reldiffmax + if (ice_reprosum_recompute) then + write(llogunit,*) subname, & + 'If tolerance exceeded, sum is recomputed using ', & + 'a serial algorithm.' + else + write(llogunit,*) subname, & + 'If tolerance exceeded, fixed-precision is sum used ', & + 'but a warning is output.' + endif + else + write(llogunit,*) subname, & + 'and not comparing with floating point algorithms.' + endif + + endif + end subroutine ice_reprosum_setopts + +!======================================================================== + +! +! Purpose: +!> Compute the global sum of each field in "arr" using the indicated +!> communicator with a reproducible yet scalable implementation based +!> on a fixed point algorithm. An alternative is to use an "almost +!> always reproducible" floating point algorithm. +! +! The accuracy of the fixed point algorithm is controlled by the +! number of "levels" of integer expansion. The algorithm will calculate +! the number of levels that is required for the sum to be essentially +! exact. The optional parameter arr_max_levels can be used to override +! the calculated value. The optional parameter arr_max_levels_out can be +! used to return the values used. +! +! The algorithm also requires an upper bound on +! the maximum summand (in absolute value) for each field, and will +! calculate this internally. However, if the optional parameters +! arr_max_levels and arr_gbl_max are both set, then the algorithm will +! use the values in arr_gbl_max for the upper bounds instead. If these +! are not upper bounds, or if the upper bounds are not tight enough +! to achieve the requisite accuracy, and if the optional parameter +! repro_sum_validate is NOT set to .false., the algorithm will repeat the +! computation with appropriate upper bounds. If only arr_gbl_max is present, +! then the maxima are computed internally (and the specified values are +! ignored). The optional parameter arr_gbl_max_out can be +! used to return the values used. +! +! Finally, the algorithm requires an upper bound on the number of +! local summands across all processes. This will be calculated internally, +! using an MPI collective, but the value in the optional argument +! gbl_max_nsummands will be used instead if (1) it is present, (2) +! it is > 0, and (3) the maximum value and required number of levels +! are also specified. (If the maximum value is calculated, the same +! MPI collective is used to determine the maximum number of local +! summands.) The accuracy of the user-specified value is not checked. +! However, if set to < 1, the value will instead be calculated. If the +! optional parameter gbl_max_nsummands_out is present, then the value +! used (gbl_max_nsummands if >= 1; calculated otherwise) will be +! returned. +! +! If requested (by setting ice_reprosum_reldiffmax >= 0.0 and passing in +! the optional rel_diff parameter), results are compared with a +! nonreproducible floating point algorithm. +! +! Note that the cost of the algorithm is not strongly correlated with +! the number of levels, which primarily shows up as a (modest) increase +! in cost of the MPI_Allreduce as a function of vector length. Rather the +! cost is more a function of (a) the number of integers required to +! represent an individual summand and (b) the number of MPI_Allreduce +! calls. The number of integers required to represent an individual +! summand is 1 or 2 when using 8-byte integers for 8-byte real summands +! when the number of local summands is not too large. As the number of +! local summands increases, the number of integers required increases. +! The number of MPI_Allreduce calls is either 2 (specifying nothing) or +! 1 (specifying gbl_max_nsummands, arr_max_levels, and arr_gbl_max +! correctly). When specifying arr_max_levels and arr_gbl_max +! incorrectly, 3 or 4 MPI_Allreduce calls will be required. +! +! The alternative algorithm is a minor modification of a parallel +! implementation of David Bailey's routine DDPDD by Helen He +! and Chris Ding. Bailey uses the Knuth trick to implement quadruple +! precision summation of double precision values with 10 double +! precision operations. The advantage of this algorithm is that +! it requires a single MPI_Allreduce and is less expensive per summand +! than is the fixed precision algorithm. The disadvantage is that it +! is not guaranteed to be reproducible (though it is reproducible +! much more often than is the standard algorithm). This alternative +! is used when the optional parameter ddpdd_sum is set to .true. It is +! also used if the fixed precision algorithm radix assumption does not +! hold. + +!---------------------------------------------------------------------- + subroutine ice_reprosum_calc (arr, arr_gsum, nsummands, dsummands, & + nflds, ddpdd_sum, & + arr_gbl_max, arr_gbl_max_out, & + arr_max_levels, arr_max_levels_out, & + gbl_max_nsummands, gbl_max_nsummands_out,& + gbl_count, repro_sum_validate, & + repro_sum_stats, rel_diff, commid ) +!---------------------------------------------------------------------- + +! Arguments + + integer, intent(in) :: nsummands !< number of local summands + integer, intent(in) :: dsummands !< declared first dimension + integer, intent(in) :: nflds !< number of fields + real(r8), intent(in) :: arr(dsummands,nflds) + !< input array + + real(r8), intent(out):: arr_gsum(nflds) + !< global means + + logical, intent(in), optional :: ddpdd_sum + !< use ddpdd algorithm instead + !< of fixed precision algorithm + + real(r8), intent(in), optional :: arr_gbl_max(nflds) + !< upper bound on max(abs(arr)) + + real(r8), intent(out), optional :: arr_gbl_max_out(nflds) + !< calculated upper bound on + !< max(abs(arr)) + + integer, intent(in), optional :: arr_max_levels(nflds) + !< maximum number of levels of + !< integer expansion to use + + integer, intent(out), optional :: arr_max_levels_out(nflds) + !< output of number of levels of + !< integer expansion to used + + integer, intent(in), optional :: gbl_max_nsummands + !< maximum of nsummand over all + !< processes + + integer, intent(out), optional :: gbl_max_nsummands_out + !< calculated maximum nsummands + !< over all processes + + integer, intent(in), optional :: gbl_count + !< was total number of summands; + !< now is ignored; use + !< gbl_max_nsummands instead + + logical, intent(in), optional :: repro_sum_validate + !< flag enabling/disabling testing that gmax and max_levels are + !< accurate/sufficient. Default is enabled. + + integer, intent(inout), optional :: repro_sum_stats(5) + !< increment running totals for + !< (1) one-reduction repro_sum + !< (2) two-reduction repro_sum + !< (3) both types in one call + !< (4) nonrepro_sum + !< (5) global max nsummands reduction + + real(r8), intent(out), optional :: rel_diff(2,nflds) + !< relative and absolute + !< differences between fixed + !< and floating point sums + + integer, intent(in), optional :: commid + !< MPI communicator + +! Local workspace + + logical :: use_ddpdd_sum ! flag indicating whether to + ! use ice_reprosum_ddpdd or not + logical :: recompute ! flag indicating need to + ! determine gmax/gmin before + ! computing sum + logical :: validate ! flag indicating need to + ! verify gmax and max_levels + ! are accurate/sufficient + integer :: omp_nthreads ! number of OpenMP threads + integer :: mpi_comm ! MPI subcommunicator + integer :: tasks ! number of MPI processes + integer :: mype ! MPI task rank + integer :: ierr ! MPI error return + integer :: ifld, isum, ithread ! loop variables + integer :: max_nsummands ! max nsummands over all processes + ! or threads (used in both ways) + + integer, allocatable :: isum_beg(:), isum_end(:) + ! range of summand indices for each + ! OpenMP thread + integer, allocatable :: arr_tlmin_exp(:,:) + ! per thread local exponent minima + integer, allocatable :: arr_tlmax_exp(:,:) + ! per thread local exponent maxima + integer :: arr_exp, arr_exp_tlmin, arr_exp_tlmax + ! summand exponent and working min/max + integer :: arr_lmin_exp(nflds) ! local exponent minima + integer :: arr_lmax_exp(nflds) ! local exponent maxima + integer :: arr_lextremes(0:nflds,2)! local exponent extrema + integer :: arr_gextremes(0:nflds,2)! global exponent extrema + + integer :: arr_gmax_exp(nflds) ! global exponents maxima + integer :: arr_gmin_exp(nflds) ! global exponents minima + integer :: arr_max_shift ! maximum safe exponent for + ! value < 1 (so that sum does + ! not overflow) + integer :: max_levels(nflds) ! maximum number of levels of + ! integer expansion to use + integer :: max_level ! maximum value in max_levels + integer :: gbl_max_red ! global max local sum reduction? (0/1) + integer :: repro_sum_fast ! 1 reduction repro_sum? (0/1) + integer :: repro_sum_slow ! 2 reduction repro_sum? (0/1) + integer :: repro_sum_both ! both fast and slow? (0/1) + integer :: nonrepro_sum ! nonrepro_sum? (0/1) + + real(r8) :: xmax_nsummands ! dble of max_nsummands + real(r8) :: arr_lsum(nflds) ! local sums + real(r8) :: arr_gsum_fast(nflds) ! global sum calculated using + ! fast, nonreproducible, + ! floating point alg. + real(r8) :: abs_diff ! absolute difference between + ! fixed and floating point + ! sums +#ifdef _OPENMP + integer omp_get_max_threads + external omp_get_max_threads +#endif + character(len=*),parameter :: subname = '(ice_reprosum_calc)' + +!----------------------------------------------------------------------- + +! check whether should use ice_reprosum_ddpdd algorithm + use_ddpdd_sum = repro_sum_use_ddpdd + if ( present(ddpdd_sum) ) then + use_ddpdd_sum = ddpdd_sum + endif + +! check whether intrinsic-based algorithm will work on this system +! (requires floating point and integer bases to be the same) +! If not, always use ddpdd. + use_ddpdd_sum = use_ddpdd_sum .or. (radix(0._r8) /= radix(0_i8)) + +! initialize local statistics variables + gbl_max_red = 0 + repro_sum_fast = 0 + repro_sum_slow = 0 + repro_sum_both = 0 + nonrepro_sum = 0 + +! set MPI communicator + if ( present(commid) ) then + mpi_comm = commid + else +#ifdef SERIAL_REMOVE_MPI + mpi_comm = 0 +#else + mpi_comm = MPI_COMM_WORLD +#endif + endif + +! if (detailed_timing) then +! call xicex_timer_start('xicex_reprosum_prebarrier') +! call xicex_mpi_barrier(mpi_comm,subname) +! call xicex_timer_stop ('xicex_reprosum_prebarrier') +! endif + + if ( use_ddpdd_sum ) then + +! if (detailed_timing) call xicex_timer_start('ice_reprosum_ddpdd') + + call ice_reprosum_ddpdd(arr, arr_gsum, nsummands, dsummands, & + nflds, mpi_comm) + repro_sum_fast = 1 + +! if (detailed_timing) call xicex_timer_stop('ice_reprosum_ddpdd') + + else + +! if (detailed_timing) call xicex_timer_start('ice_reprosum_int') + +! get number of MPI tasks +#ifdef SERIAL_REMOVE_MPI + tasks = 1 + mype = 0 +#else + call mpi_comm_size(mpi_comm, tasks, ierr) + call mpi_comm_rank(mpi_comm, mype, ierr) +#endif + +! get number of OpenMP threads +#ifdef _OPENMP + omp_nthreads = omp_get_max_threads() +#else + omp_nthreads = 1 +#endif + +! see if have sufficient information to not require max/min allreduce + recompute = .true. + validate = .false. + if ( present(arr_gbl_max) .and. present(arr_max_levels) ) then + recompute = .false. + +! setting lower bound on max_level*nflds to be 64 to improve OpenMP +! performance for loopb in ice_reprosum_int + max_level = (64/nflds) + 1 + do ifld=1,nflds + if ((arr_gbl_max(ifld) .ge. 0.0_r8) .and. & + (arr_max_levels(ifld) > 0)) then + + arr_gmax_exp(ifld) = exponent(arr_gbl_max(ifld)) + if (max_level < arr_max_levels(ifld)) & + max_level = arr_max_levels(ifld) + + else + recompute = .true. + endif + enddo + + if (.not. recompute) then + +! determine maximum number of summands in local phases of the +! algorithm +! if (detailed_timing) call xicex_timer_start("repro_sum_allr_max") + if ( present(gbl_max_nsummands) ) then + if (gbl_max_nsummands < 1) then +#ifdef SERIAL_REMOVE_MPI + max_nsummands = nsummands +#else + call mpi_allreduce (nsummands, max_nsummands, 1, & + MPI_INTEGER, MPI_MAX, mpi_comm, ierr) +#endif + gbl_max_red = 1 + else + max_nsummands = gbl_max_nsummands + endif + else +#ifdef SERIAL_REMOVE_MPI + max_nsummands = nsummands +#else + call mpi_allreduce (nsummands, max_nsummands, 1, & + MPI_INTEGER, MPI_MAX, mpi_comm, ierr) +#endif + gbl_max_red = 1 + endif +! if (detailed_timing) call xicex_timer_stop("repro_sum_allr_max") + +! determine maximum shift. Shift needs to be small enough that summation +! does not exceed maximum number of digits in i8. + +! if requested, return max_nsummands before it is redefined + if ( present( gbl_max_nsummands_out) ) then + gbl_max_nsummands_out = max_nsummands + endif + +! summing within each thread first + max_nsummands = (max_nsummands/omp_nthreads) + 1 +! then over threads and tasks + max_nsummands = max(max_nsummands, tasks*omp_nthreads) + + xmax_nsummands = dble(max_nsummands) + arr_max_shift = digits(0_i8) - (exponent(xmax_nsummands) + 1) + if (arr_max_shift < 2) then + write(tmpstr,*) subname//' ERROR: number of summands too large for fixed precision algorithm' + call abort_ice(tmpstr,file=__FILE__,line=__LINE__) + endif + +! calculate sum + if (present(repro_sum_validate)) then + validate = repro_sum_validate + else + validate = .true. + endif + call ice_reprosum_int(arr, arr_gsum, nsummands, dsummands, & + nflds, arr_max_shift, arr_gmax_exp, & + arr_max_levels, max_level, validate, & + recompute, omp_nthreads, mpi_comm) + +! record statistics, etc. + repro_sum_fast = 1 + if (recompute) then + repro_sum_both = 1 + else +! if requested, return specified levels and upper bounds on maxima + if ( present(arr_max_levels_out) ) then + do ifld=1,nflds + arr_max_levels_out(ifld) = arr_max_levels(ifld) + enddo + endif + if ( present(arr_gbl_max_out) ) then + do ifld=1,nflds + arr_gbl_max_out(ifld) = arr_gbl_max(ifld) + enddo + endif + endif + endif + endif + +! do not have sufficient information; calculate global max/min and +! use to compute required number of levels + if (recompute) then + +! record statistic + repro_sum_slow = 1 + +! determine maximum and minimum (non-zero) summand values and +! maximum number of local summands + +! allocate thread-specific work space + allocate(arr_tlmax_exp(nflds,omp_nthreads)) + allocate(arr_tlmin_exp(nflds,omp_nthreads)) + allocate(isum_beg(omp_nthreads)) + allocate(isum_end(omp_nthreads)) + +! split summand index range over OpenMP threads + call split_indices(nsummands, omp_nthreads, isum_beg, isum_end) + +!$omp parallel do & +!$omp default(shared) & +!$omp private(ithread, ifld, isum, arr_exp, arr_exp_tlmin, arr_exp_tlmax) + do ithread=1,omp_nthreads +! if (detailed_timing) call xicex_timer_start('repro_sum_loopa') + do ifld=1,nflds + arr_exp_tlmin = MAXEXPONENT(1._r8) + arr_exp_tlmax = MINEXPONENT(1._r8) + do isum=isum_beg(ithread),isum_end(ithread) + if (arr(isum,ifld) .ne. 0.0_r8) then + arr_exp = exponent(arr(isum,ifld)) + arr_exp_tlmin = min(arr_exp,arr_exp_tlmin) + arr_exp_tlmax = max(arr_exp,arr_exp_tlmax) + endif + end do + arr_tlmin_exp(ifld,ithread) = arr_exp_tlmin + arr_tlmax_exp(ifld,ithread) = arr_exp_tlmax + end do +! if (detailed_timing) call xicex_timer_stop('repro_sum_loopa') + end do + + do ifld=1,nflds + arr_lmax_exp(ifld) = maxval(arr_tlmax_exp(ifld,:)) + arr_lmin_exp(ifld) = minval(arr_tlmin_exp(ifld,:)) + end do + deallocate(arr_tlmin_exp,arr_tlmax_exp,isum_beg,isum_end) + + arr_lextremes(0,:) = -nsummands + arr_lextremes(1:nflds,1) = -arr_lmax_exp(:) + arr_lextremes(1:nflds,2) = arr_lmin_exp(:) +! if (detailed_timing) call xicex_timer_start("repro_sum_allr_minmax") +#ifdef SERIAL_REMOVE_MPI + arr_gextremes = arr_lextremes +#else + call mpi_allreduce (arr_lextremes, arr_gextremes, 2*(nflds+1), & + MPI_INTEGER, MPI_MIN, mpi_comm, ierr) +#endif +! if (detailed_timing) call xicex_timer_stop("repro_sum_allr_minmax") + max_nsummands = -arr_gextremes(0,1) + arr_gmax_exp(:) = -arr_gextremes(1:nflds,1) + arr_gmin_exp(:) = arr_gextremes(1:nflds,2) + +! if a field is identically zero, arr_gmin_exp still equals MAXEXPONENT +! and arr_gmax_exp still equals MINEXPONENT. In this case, set +! arr_gmin_exp = arr_gmax_exp = MINEXPONENT + do ifld=1,nflds + arr_gmin_exp(ifld) = min(arr_gmax_exp(ifld),arr_gmin_exp(ifld)) + enddo + +! if requested, return upper bounds on observed maxima + if ( present(arr_gbl_max_out) ) then + do ifld=1,nflds + arr_gbl_max_out(ifld) = scale(1.0_r8,arr_gmax_exp(ifld)) + enddo + endif + +! if requested, return max_nsummands before it is redefined + if ( present( gbl_max_nsummands_out) ) then + gbl_max_nsummands_out = max_nsummands + endif + +! determine maximum shift (same as in previous branch, but with calculated +! max_nsummands). Shift needs to be small enough that summation does not +! exceed maximum number of digits in i8. + +! summing within each thread first + max_nsummands = (max_nsummands/omp_nthreads) + 1 +! then over threads and tasks + max_nsummands = max(max_nsummands, tasks*omp_nthreads) + + xmax_nsummands = dble(max_nsummands) + arr_max_shift = digits(0_i8) - (exponent(xmax_nsummands) + 1) + if (arr_max_shift < 2) then + write(tmpstr,*) subname//' ERROR: number of summands too large for fixed precision algorithm' + call abort_ice(tmpstr,file=__FILE__,line=__LINE__) + endif + +! determine maximum number of levels required for each field +! ((digits(0_i8) + (arr_gmax_exp(ifld)-arr_gmin_exp(ifld))) / arr_max_shift) +! + 1 because first truncation probably does not involve a maximal shift +! + 1 to guarantee that the integer division rounds up (not down) +! (setting lower bound on max_level*nflds to be 64 to improve OpenMP +! performance for loopb in ice_reprosum_int) + max_level = (64/nflds) + 1 + do ifld=1,nflds + max_levels(ifld) = 2 + & + ((digits(0_i8) + (arr_gmax_exp(ifld)-arr_gmin_exp(ifld))) & + / arr_max_shift) + if ( present(arr_max_levels) .and. (.not. validate) ) then +! if validate true, then computation with arr_max_levels failed +! previously + if ( arr_max_levels(ifld) > 0 ) then + max_levels(ifld) = & + min(arr_max_levels(ifld),max_levels(ifld)) + endif + endif + if (max_level < max_levels(ifld)) & + max_level = max_levels(ifld) + enddo + +! if requested, return calculated levels + if ( present(arr_max_levels_out) ) then + do ifld=1,nflds + arr_max_levels_out(ifld) = max_levels(ifld) + enddo + endif + +! calculate sum + validate = .false. + call ice_reprosum_int(arr, arr_gsum, nsummands, dsummands, nflds, & + arr_max_shift, arr_gmax_exp, max_levels, & + max_level, validate, recompute, & + omp_nthreads, mpi_comm) + + endif + +! if (detailed_timing) call xicex_timer_stop('ice_reprosum_int') + + endif + +! compare fixed and floating point results + if ( present(rel_diff) ) then + if (ice_reprosum_reldiffmax >= 0.0_r8) then + +! if (detailed_timing) then +! call xicex_timer_start('xicex_nonreprosum_prebarrier') +! call xicex_mpi_barrier(mpi_comm,subname) +! call xicex_timer_stop ('xicex_nonreprosum_prebarrier') +! endif + +! if (detailed_timing) call xicex_timer_start('nonrepro_sum') +! record statistic + nonrepro_sum = 1 +! compute nonreproducible sum + arr_lsum(:) = 0._r8 +!$omp parallel do & +!$omp default(shared) & +!$omp private(ifld, isum) + do ifld=1,nflds + do isum=1,nsummands + arr_lsum(ifld) = arr(isum,ifld) + arr_lsum(ifld) + end do + end do + +#ifdef SERIAL_REMOVE_MPI + arr_gsum_fast = arr_lsum +#else + call mpi_allreduce (arr_lsum, arr_gsum_fast, nflds, & + MPI_REAL8, MPI_SUM, mpi_comm, ierr) +#endif + +! if (detailed_timing) call xicex_timer_stop('nonrepro_sum') + +! determine differences +!$omp parallel do & +!$omp default(shared) & +!$omp private(ifld, abs_diff) + do ifld=1,nflds + abs_diff = abs(arr_gsum_fast(ifld)-arr_gsum(ifld)) + if (abs(arr_gsum(ifld)) > abs_diff) then + rel_diff(1,ifld) = abs_diff/abs(arr_gsum(ifld)) + else + rel_diff(1,ifld) = abs_diff + endif + rel_diff(2,ifld) = abs_diff + enddo + else + rel_diff(:,:) = 0.0_r8 + endif + endif + +! return statistics + if ( present(repro_sum_stats) ) then + repro_sum_stats(1) = repro_sum_stats(1) + repro_sum_fast + repro_sum_stats(2) = repro_sum_stats(2) + repro_sum_slow + repro_sum_stats(3) = repro_sum_stats(3) + repro_sum_both + repro_sum_stats(4) = repro_sum_stats(4) + nonrepro_sum + repro_sum_stats(5) = repro_sum_stats(5) + gbl_max_red + endif + + + end subroutine ice_reprosum_calc + +!======================================================================== +! +! Purpose: +!> Compute the global sum of each field in "arr" using the indicated +!> communicator with a reproducible yet scalable implementation based +!> on a fixed point algorithm. The accuracy of the fixed point algorithm +!> is controlled by the number of "levels" of integer expansion, the +!> maximum value of which is specified by max_level. +! +!---------------------------------------------------------------------- + + subroutine ice_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, & + arr_max_shift, arr_gmax_exp, max_levels, & + max_level, validate, recompute, & + omp_nthreads, mpi_comm ) + +!---------------------------------------------------------------------- + +! Arguments + + integer, intent(in) :: nsummands !< number of local summands + integer, intent(in) :: dsummands !< declared first dimension + integer, intent(in) :: nflds !< number of fields + integer, intent(in) :: arr_max_shift !< maximum safe exponent for + !< value < 1 (so that sum + !< does not overflow) + integer, intent(in) :: arr_gmax_exp(nflds) + !< exponents of global maxima + integer, intent(in) :: max_levels(nflds) + !< maximum number of levels + !< of integer expansion + integer, intent(in) :: max_level !< maximum value in + !< max_levels + integer, intent(in) :: omp_nthreads !< number of OpenMP threads + integer, intent(in) :: mpi_comm !< MPI subcommunicator + + real(r8), intent(in) :: arr(dsummands,nflds) + !< input array + + logical, intent(in):: validate + !< flag indicating that accuracy of solution generated from + !< arr_gmax_exp and max_levels should be tested + + logical, intent(out):: recompute + !< flag indicating that either the upper bounds are inaccurate, + !< or max_levels and arr_gmax_exp do not generate accurate + !< enough sums + + real(r8), intent(out):: arr_gsum(nflds) !< global means + +! Local workspace + + integer, parameter :: max_jlevel = & + 1 + (digits(0_i8)/digits(0.0_r8)) + + integer(i8) :: i8_arr_tlsum_level(0:max_level,nflds,omp_nthreads) + ! integer vector representing local + ! sum (per thread, per field) + integer(i8) :: i8_arr_lsum_level((max_level+3)*nflds) + ! integer vector representing local + ! sum + integer(i8) :: i8_arr_level ! integer part of summand for current + ! expansion level + integer(i8) :: i8_arr_gsum_level((max_level+3)*nflds) + ! integer vector representing global + ! sum + integer(i8) :: IX_8 ! integer representation of current + ! jlevels of X_8 ('part' of + ! i8_arr_gsum_level) + integer(i8) :: i8_sign ! sign global sum + integer(i8) :: i8_radix ! radix for i8 variables + + integer :: max_error(nflds,omp_nthreads) + ! accurate upper bound on data? + integer :: not_exact(nflds,omp_nthreads) + ! max_levels sufficient to + ! capture all digits? + integer :: isum_beg(omp_nthreads), isum_end(omp_nthreads) + ! range of summand indices for each + ! OpenMP thread + integer :: ifld, isum, ithread + ! loop variables + integer :: arr_exp ! exponent of summand + integer :: arr_shift ! exponent used to generate integer + ! for current expansion level + integer :: ilevel ! current integer expansion level + integer :: offset(nflds) ! beginning location in + ! i8_arr_{g,l}sum_level for integer + ! expansion of current ifld + integer :: voffset ! modification to offset used to + ! include validation metrics + integer :: ioffset ! offset(ifld) + integer :: jlevel ! number of floating point 'pieces' + ! extracted from a given i8 integer + integer :: ierr ! MPI error return + integer :: LX(max_jlevel) ! exponent of X_8 (see below) + integer :: veclth ! total length of i8_arr_lsum_level + integer :: sum_digits ! lower bound on number of significant + ! in integer expansion of sum + integer :: curr_exp ! exponent of partial sum during + ! reconstruction from integer vector + integer :: corr_exp ! exponent of current summand in + ! reconstruction from integer vector + + real(r8) :: arr_frac ! fraction of summand + real(r8) :: arr_remainder ! part of summand remaining after + ! current level of integer expansion + real(r8) :: X_8(max_jlevel) ! r8 vector representation of current + ! i8_arr_gsum_level + real(r8) :: RX_8 ! r8 representation of difference + ! between current i8_arr_gsum_level + ! and current jlevels of X_8 + ! (== IX_8). Also used in final + ! scaling step + + logical :: first ! flag used to indicate that just + ! beginning reconstruction of sum + ! from integer vector + + character(len=*),parameter :: subname = '(ice_reprosum_int)' + +!----------------------------------------------------------------------- +! Save radix of i8 variables in an i8 variable + i8_radix = radix(IX_8) + +! If validating upper bounds, reserve space for validation metrics +! In both cases, reserve an extra level for overflow from the top level + if (validate) then + voffset = 3 + else + voffset = 1 + endif + +! compute offsets for each field + offset(1) = voffset + do ifld=2,nflds + offset(ifld) = offset(ifld-1) & + + (max_levels(ifld-1) + voffset) + enddo + veclth = offset(nflds) + max_levels(nflds) + +! split summand index range over OpenMP threads + call split_indices(nsummands, omp_nthreads, isum_beg, isum_end) + +! convert local summands to vector of integers and sum +! (Using scale instead of set_exponent because arr_remainder may not be +! "normal" after level 1 calculation) + i8_arr_lsum_level(:) = 0_i8 + +!$omp parallel do & +!$omp default(shared) & +!$omp private(ithread, ifld, ioffset, isum, arr_frac, arr_exp, & +!$omp arr_shift, ilevel, i8_arr_level, arr_remainder, RX_8, IX_8) + do ithread=1,omp_nthreads +! if (detailed_timing) call xicex_timer_start('repro_sum_loopb') + do ifld=1,nflds + ioffset = offset(ifld) + + max_error(ifld,ithread) = 0 + not_exact(ifld,ithread) = 0 + + i8_arr_tlsum_level(:,ifld,ithread) = 0_i8 + do isum=isum_beg(ithread),isum_end(ithread) + arr_remainder = 0.0_r8 + + if (arr(isum,ifld) .ne. 0.0_r8) then + arr_exp = exponent(arr(isum,ifld)) + arr_frac = fraction(arr(isum,ifld)) + +! test that global maximum upper bound is an upper bound + if (arr_exp > arr_gmax_exp(ifld)) then + max_error(ifld,ithread) = 1 + exit + endif + +! calculate first shift + arr_shift = arr_max_shift - (arr_gmax_exp(ifld)-arr_exp) + +! determine first (probably) nonzero level (assuming initial fraction is +! 'normal' - algorithm still works if this is not true) +! NOTE: this is critical; scale will set to zero if min exponent is too small. + if (arr_shift < 1) then + ilevel = (1 + (arr_gmax_exp(ifld)-arr_exp))/arr_max_shift + arr_shift = ilevel*arr_max_shift - (arr_gmax_exp(ifld)-arr_exp) + + do while (arr_shift < 1) + arr_shift = arr_shift + arr_max_shift + ilevel = ilevel + 1 + enddo + else + ilevel = 1 + endif + + if (ilevel .le. max_levels(ifld)) then +! apply first shift/truncate, add it to the relevant running +! sum, and calculate the remainder. + arr_remainder = scale(arr_frac,arr_shift) + i8_arr_level = int(arr_remainder,i8) + i8_arr_tlsum_level(ilevel,ifld,ithread) = & + i8_arr_tlsum_level(ilevel,ifld,ithread) + i8_arr_level + arr_remainder = arr_remainder - i8_arr_level + +! while the remainder is non-zero, continue to shift, truncate, +! sum, and calculate new remainder + do while ((arr_remainder .ne. 0.0_r8) & + .and. (ilevel < max_levels(ifld))) + ilevel = ilevel + 1 + arr_remainder = scale(arr_remainder,arr_max_shift) + i8_arr_level = int(arr_remainder,i8) + i8_arr_tlsum_level(ilevel,ifld,ithread) = & + i8_arr_tlsum_level(ilevel,ifld,ithread) + i8_arr_level + arr_remainder = arr_remainder - i8_arr_level + enddo + + endif + endif + + if (arr_remainder .ne. 0.0_r8) then + not_exact(ifld,ithread) = 1 + endif + + enddo + +! postprocess integer vector to eliminate potential for overlap in the following +! sums over threads and processes: if value larger than or equal to +! (radix(IX_8)**arr_max_shift), add this 'overlap' to next larger integer in +! vector, resulting in nonoverlapping ranges for each component. Note that +! "ilevel-1==0" corresponds to an extra level used to guarantee that the sums +! over threads and processes do not overflow for ilevel==1. + do ilevel=max_levels(ifld),1,-1 + RX_8 = i8_arr_tlsum_level(ilevel,ifld,ithread) + IX_8 = int(scale(RX_8,-arr_max_shift),i8) + if (IX_8 .ne. 0_i8) then + i8_arr_tlsum_level(ilevel-1,ifld,ithread) = & + i8_arr_tlsum_level(ilevel-1,ifld,ithread) + IX_8 + IX_8 = IX_8*(i8_radix**arr_max_shift) + i8_arr_tlsum_level(ilevel,ifld,ithread) = & + i8_arr_tlsum_level(ilevel,ifld,ithread) - IX_8 + endif + enddo + enddo +! if (detailed_timing) call xicex_timer_stop('repro_sum_loopb') + enddo + +! sum contributions from different threads + do ifld=1,nflds + ioffset = offset(ifld) + do ithread = 1,omp_nthreads + do ilevel = 0,max_levels(ifld) + i8_arr_lsum_level(ioffset+ilevel) = & + i8_arr_lsum_level(ioffset+ilevel) & + + i8_arr_tlsum_level(ilevel,ifld,ithread) + enddo + enddo + enddo + +! record if upper bound was inaccurate or if level expansion stopped +! before full accuracy was achieved + if (validate) then + do ifld=1,nflds + ioffset = offset(ifld) + i8_arr_lsum_level(ioffset-voffset+1) = maxval(max_error(ifld,:)) + i8_arr_lsum_level(ioffset-voffset+2) = maxval(not_exact(ifld,:)) + enddo + endif + +! sum integer vector element-wise +#ifdef SERIAL_REMOVE_MPI + i8_arr_gsum_level = i8_arr_lsum_level +#else +#if ( defined noI8 ) + ! Workaround for when i8 is not supported. +! if (detailed_timing) call xicex_timer_start("repro_sum_allr_i4") + call mpi_allreduce (i8_arr_lsum_level, i8_arr_gsum_level, & + veclth, MPI_INTEGER, MPI_SUM, mpi_comm, ierr) +! if (detailed_timing) call xicex_timer_stop("repro_sum_allr_i4") +#else +! if (detailed_timing) call xicex_timer_start("repro_sum_allr_i8") + call mpi_allreduce (i8_arr_lsum_level, i8_arr_gsum_level, & + veclth, MPI_INTEGER8, MPI_SUM, mpi_comm, ierr) +! if (detailed_timing) call xicex_timer_stop("repro_sum_allr_i8") +#endif +#endif + +! Construct global sum from integer vector representation: +! 1) arr_max_shift is the shift applied to fraction(arr_gmax) . +! When shifting back, need to "add back in" true arr_gmax exponent. This was +! removed implicitly by working only with the fraction . +! 2) want to add levels into sum in reverse order (smallest to largest). However, +! even this can generate floating point rounding errors if signs of integers +! alternate. To avoid this, do some arithmetic with integer vectors so that all +! components have the same sign. This should keep relative difference between +! using different integer sizes (e.g. i8 and i4) to machine epsilon +! 3) assignment to X_8 will usually lose accuracy since maximum integer +! size is greater than the max number of 'digits' in r8 value (if xmax_nsummands +! correction not very large). Calculate remainder and add in first (since +! smaller). One correction is sufficient for r8 (53 digits) and i8 (63 digits). +! For r4 (24 digits) may need to correct twice. Code is written in a general +! fashion, to work no matter how many corrections are necessary (assuming +! max_jlevel parameter calculation is correct). + + recompute = .false. + do ifld=1,nflds + arr_gsum(ifld) = 0.0_r8 + ioffset = offset(ifld) + +! if validate is .true., test whether the summand upper bound +! was exceeded on any of the processes + if (validate) then + if (i8_arr_gsum_level(ioffset-voffset+1) .ne. 0_i8) then + recompute = .true. + endif + endif + + if (.not. recompute) then + +! preprocess integer vector: +! a) if value larger than or equal to (radix(IX_8)**arr_max_shift), add this 'overlap' +! to next larger integer in vector, resulting in nonoverlapping ranges for each +! component. Note that have "ilevel-1=0" level here as described above. + do ilevel=max_levels(ifld),1,-1 + RX_8 = i8_arr_gsum_level(ioffset+ilevel) + IX_8 = int(scale(RX_8,-arr_max_shift),i8) + if (IX_8 .ne. 0_i8) then + i8_arr_gsum_level(ioffset+ilevel-1) = i8_arr_gsum_level(ioffset+ilevel-1) & + + IX_8 + IX_8 = IX_8*(i8_radix**arr_max_shift) + i8_arr_gsum_level(ioffset+ilevel) = i8_arr_gsum_level(ioffset+ilevel) & + - IX_8 + endif + enddo +! b) subtract +/- 1 from larger and add +/- 1 to smaller when necessary +! so that all vector components have the same sign (eliminating loss +! of accuracy arising from difference of large values when +! reconstructing r8 sum from integer vector) + ilevel = 0 + do while ((i8_arr_gsum_level(ioffset+ilevel) .eq. 0_i8) & + .and. (ilevel < max_levels(ifld))) + ilevel = ilevel + 1 + enddo +! + if (ilevel < max_levels(ifld)) then + if (i8_arr_gsum_level(ioffset+ilevel) > 0_i8) then + i8_sign = 1_i8 + else + i8_sign = -1_i8 + endif + do jlevel=ilevel,max_levels(ifld)-1 + if (sign(1_i8,i8_arr_gsum_level(ioffset+jlevel)) & + .ne. sign(1_i8,i8_arr_gsum_level(ioffset+jlevel+1))) then + i8_arr_gsum_level(ioffset+jlevel) = i8_arr_gsum_level(ioffset+jlevel) & + - i8_sign + i8_arr_gsum_level(ioffset+jlevel+1) = i8_arr_gsum_level(ioffset+jlevel+1) & + + i8_sign*(i8_radix**arr_max_shift) + endif + enddo + endif + +! start with maximum shift, and work up to larger values + arr_shift = arr_gmax_exp(ifld) & + - max_levels(ifld)*arr_max_shift + curr_exp = 0 + first = .true. + do ilevel=max_levels(ifld),0,-1 + + if (i8_arr_gsum_level(ioffset+ilevel) .ne. 0_i8) then + jlevel = 1 + +! r8 representation of higher order bits in integer + X_8(jlevel) = i8_arr_gsum_level(ioffset+ilevel) + LX(jlevel) = exponent(X_8(jlevel)) + +! calculate remainder + IX_8 = int(X_8(jlevel),i8) + RX_8 = (i8_arr_gsum_level(ioffset+ilevel) - IX_8) + +! repeat using remainder + do while (RX_8 .ne. 0.0_r8) + jlevel = jlevel + 1 + X_8(jlevel) = RX_8 + LX(jlevel) = exponent(RX_8) + IX_8 = IX_8 + int(RX_8,i8) + RX_8 = (i8_arr_gsum_level(ioffset+ilevel) - IX_8) + enddo + +! add in contributions, smaller to larger, rescaling for each +! addition to guarantee that exponent of working summand is always +! larger than minexponent + do while (jlevel > 0) + if (first) then + curr_exp = LX(jlevel) + arr_shift + arr_gsum(ifld) = fraction(X_8(jlevel)) + first = .false. + else + corr_exp = curr_exp - (LX(jlevel) + arr_shift) + arr_gsum(ifld) = fraction(X_8(jlevel)) & + + scale(arr_gsum(ifld),corr_exp) + curr_exp = LX(jlevel) + arr_shift + endif + jlevel = jlevel - 1 + enddo + + endif + + arr_shift = arr_shift + arr_max_shift + enddo + +! apply final exponent correction, scaling first if exponent is too small +! to apply directly + corr_exp = curr_exp + exponent(arr_gsum(ifld)) + if (corr_exp .ge. MINEXPONENT(1._r8)) then + arr_gsum(ifld) = set_exponent(arr_gsum(ifld),corr_exp) + else + RX_8 = set_exponent(arr_gsum(ifld), & + corr_exp-MINEXPONENT(1._r8)) + arr_gsum(ifld) = scale(RX_8,MINEXPONENT(1._r8)) + endif + +! if validate is .true. and some precision lost, test whether 'too much' +! was lost, due to too loose an upper bound, too stringent a limit on number +! of levels of expansion, cancellation, .... Calculated by comparing lower +! bound on number of sigificant digits with number of digits in 1.0_r8 . + if (validate) then + if (i8_arr_gsum_level(ioffset-voffset+2) .ne. 0_i8) then + +! find first nonzero level and use exponent for this level, then assume all +! subsequent levels contribute arr_max_shift digits. + sum_digits = 0 + do ilevel=0,max_levels(ifld) + if (sum_digits .eq. 0) then + if (i8_arr_gsum_level(ioffset+ilevel) .ne. 0_i8) then + X_8(1) = i8_arr_gsum_level(ioffset+ilevel) + LX(1) = exponent(X_8(1)) + sum_digits = LX(1) + endif + else + sum_digits = sum_digits + arr_max_shift + endif + enddo + + if (sum_digits < digits(1.0_r8)) then + recompute = .true. + endif + endif + endif + + endif + + enddo + + + end subroutine ice_reprosum_int + +!======================================================================== +! +! Purpose: +!> Test whether distributed sum exceeds tolerance and print out a +!> warning message. +! +!---------------------------------------------------------------------- + + logical function ice_reprosum_tolExceeded (name, nflds, master, & + logunit, rel_diff ) +!---------------------------------------------------------------------- + +! Arguments + + character(len=*), intent(in) :: name !< distributed sum identifier + integer, intent(in) :: nflds !< number of fields + logical, intent(in) :: master !< process that will write + !< warning messages? + integer, optional, intent(in) :: logunit!< unit warning messages + !< written to + real(r8), intent(in) :: rel_diff(2,nflds) + !< relative and absolute + !< differences between fixed + !< and floating point sums + +! Local workspace + + integer :: llogunit ! local log unit + integer :: ifld ! field index + integer :: exceeds_limit ! number of fields whose + ! sum exceeds tolerance + real(r8) :: max_rel_diff ! maximum relative difference + integer :: max_rel_diff_idx ! field index for max. rel. diff. + real(r8) :: max_abs_diff ! maximum absolute difference + integer :: max_abs_diff_idx ! field index for max. abs. diff. + character(len=*),parameter :: subname = '(ice_reprosum_tolExceeded)' + +!----------------------------------------------------------------------- + + ice_reprosum_tolExceeded = .false. + if (ice_reprosum_reldiffmax < 0.0_r8) return + + if ( present(logunit) ) then + llogunit = logunit + else + llogunit = nu_diag + endif + + ! check that "fast" reproducible sum is accurate enough. + exceeds_limit = 0 + max_rel_diff = 0.0_r8 + max_abs_diff = 0.0_r8 + do ifld=1,nflds + if (rel_diff(1,ifld) > ice_reprosum_reldiffmax) then + exceeds_limit = exceeds_limit + 1 + if (rel_diff(1,ifld) > max_rel_diff) then + max_rel_diff = rel_diff(1,ifld) + max_rel_diff_idx = ifld + endif + if (rel_diff(2,ifld) > max_abs_diff) then + max_abs_diff = rel_diff(2,ifld) + max_abs_diff_idx = ifld + endif + endif + enddo + + if (exceeds_limit > 0) then + if (master) then + write(llogunit,*) subname,trim(name), & + ': difference in fixed and floating point sums ', & + ' exceeds tolerance in ', exceeds_limit, & + ' fields.' + write(llogunit,*) subname,' Maximum relative diff: (rel)', & + rel_diff(1,max_rel_diff_idx), ' (abs) ', & + rel_diff(2,max_rel_diff_idx) + write(llogunit,*) subname,' Maximum absolute diff: (rel)', & + rel_diff(1,max_abs_diff_idx), ' (abs) ', & + rel_diff(2,max_abs_diff_idx) + endif + ice_reprosum_tolExceeded = .true. + endif + + + end function ice_reprosum_tolExceeded + +!======================================================================== +! +! Purpose: +!> Compute the global sum of each field in "arr" using the indicated +!> communicator with a reproducible yet scalable implementation based +!> on He and Ding's implementation of the double-double algorithm. +! +!---------------------------------------------------------------------- + + subroutine ice_reprosum_ddpdd (arr, arr_gsum, nsummands, dsummands, & + nflds, mpi_comm ) +!---------------------------------------------------------------------- + +! Arguments + + integer, intent(in) :: nsummands !< number of local summands + integer, intent(in) :: dsummands !< declared first dimension + integer, intent(in) :: nflds !< number of fields + real(r8), intent(in) :: arr(dsummands,nflds) + !< input array + integer, intent(in) :: mpi_comm !< MPI subcommunicator + + real(r8), intent(out):: arr_gsum(nflds) + !< global sums + + +! Local workspace + + integer :: old_cw ! for x86 processors, save + ! current arithmetic mode + integer :: ifld, isum ! loop variables + integer :: ierr ! MPI error return + + real(r8) :: e, t1, t2 ! temporaries + complex(r8) :: arr_lsum_dd(nflds) ! local sums (in double-double + ! format) + complex(r8) :: arr_gsum_dd(nflds) ! global sums (in double-double + ! format) + + integer, save :: mpi_sumdd + logical, save :: first_time = .true. + character(len=*),parameter :: subname = '(ice_reprosum_ddpdd)' + +!----------------------------------------------------------------------- + + call ice_shr_reprosumx86_fix_start (old_cw) + + if (first_time) then +#ifdef SERIAL_REMOVE_MPI + mpi_sumdd = 0 +#else + call mpi_op_create(ddpdd, .true., mpi_sumdd, ierr) +#endif + first_time = .false. + endif + + do ifld=1,nflds + arr_lsum_dd(ifld) = (0.0_r8,0.0_r8) + + do isum=1,nsummands + + ! Compute arr(isum,ifld) + arr_lsum_dd(ifld) using Knuth''s + ! trick. + t1 = arr(isum,ifld) + real(arr_lsum_dd(ifld)) + e = t1 - arr(isum,ifld) + t2 = ((real(arr_lsum_dd(ifld)) - e) & + + (arr(isum,ifld) - (t1 - e))) & + + aimag(arr_lsum_dd(ifld)) + + ! The result is t1 + t2, after normalization. + arr_lsum_dd(ifld) = cmplx ( t1 + t2, t2 - ((t1 + t2) - t1), r8 ) + enddo + + enddo + +#ifdef SERIAL_REMOVE_MPI + arr_gsum_dd = arr_lsum_dd +#else + call mpi_allreduce (arr_lsum_dd, arr_gsum_dd, nflds, & + MPI_COMPLEX16, mpi_sumdd, mpi_comm, ierr) +#endif + do ifld=1,nflds + arr_gsum(ifld) = real(arr_gsum_dd(ifld)) + enddo + + call ice_shr_reprosumx86_fix_end (old_cw) + + end subroutine ice_reprosum_ddpdd + +!----------------------------------------------------------------------- + + subroutine DDPDD (dda, ddb, len, itype) +!---------------------------------------------------------------------- +! +! Purpose: +! Modification of original codes written by David H. Bailey +! This subroutine computes ddb(i) = dda(i)+ddb(i) +! +!---------------------------------------------------------------------- + +! Arguments + + integer, intent(in) :: len ! array length + complex(r8), intent(in) :: dda(len) ! input + complex(r8), intent(inout) :: ddb(len) ! result + integer, intent(in) :: itype ! unused + +! Local workspace + + real(r8) e, t1, t2 + integer i + character(len=*),parameter :: subname = '(ice_reprosum_mod:DDPDD)' + +!----------------------------------------------------------------------- + + do i = 1, len +! Compute dda + ddb using Knuth's trick. + t1 = real(dda(i)) + real(ddb(i)) + e = t1 - real(dda(i)) + t2 = ((real(ddb(i)) - e) + (real(dda(i)) - (t1 - e))) & + + aimag(dda(i)) + aimag(ddb(i)) + +! The result is t1 + t2, after normalization. + ddb(i) = cmplx ( t1 + t2, t2 - ((t1 + t2) - t1), r8 ) + enddo + + + end subroutine DDPDD + +!----------------------------------------------------------------------- + + subroutine split_indices(total,num_pieces,ibeg,iend) +!---------------------------------------------------------------------- +! +! Purpose: +! Split range into 'num_pieces' +! +!---------------------------------------------------------------------- + +! Arguments + + integer, intent(in) :: total + integer, intent(in) :: num_pieces + integer, intent(out) :: ibeg(num_pieces), iend(num_pieces) + +! Local workspace + + integer :: itmp1, itmp2, ioffset, i + character(len=*),parameter :: subname = '(ice_reprosum_mod:split_indices)' + +!----------------------------------------------------------------------- + + itmp1 = total/num_pieces + itmp2 = mod(total,num_pieces) + ioffset = 0 + do i=1,itmp2 + ibeg(i) = ioffset + 1 + iend(i) = ioffset + (itmp1+1) + ioffset = iend(i) + enddo + do i=itmp2+1,num_pieces + ibeg(i) = ioffset + 1 + if (ibeg(i) > total) then + iend(i) = ibeg(i) - 1 + else + iend(i) = ioffset + itmp1 + ioffset = iend(i) + endif + enddo + + end subroutine split_indices + +!======================================================================== + +end module ice_reprosum diff --git a/cicecore/cicedynB/infrastructure/comm/serial/ice_global_reductions.F90 b/cicecore/cicedynB/infrastructure/comm/serial/ice_global_reductions.F90 index 6768d67bf..d539f7246 100644 --- a/cicecore/cicedynB/infrastructure/comm/serial/ice_global_reductions.F90 +++ b/cicecore/cicedynB/infrastructure/comm/serial/ice_global_reductions.F90 @@ -1,4 +1,5 @@ !||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| +#define SERIAL_REMOVE_MPI module ice_global_reductions @@ -8,19 +9,34 @@ module ice_global_reductions ! author: Phil Jones, LANL ! Oct. 2004: Adapted from POP version by William H. Lipscomb, LANL ! Feb. 2008: Updated from POP version by Elizabeth C. Hunke, LANL +! Aug. 2014: Added bit-for-bit reproducible options for global_sum_dbl +! and global_sum_prod_dbl by T Craig NCAR +! Mar. 2019: Refactored bit-for-bit option, T Craig use ice_kinds_mod + use ice_blocks, only: block, get_block, nx_block, ny_block +#ifdef SERIAL_REMOVE_MPI + use ice_communicate, only: my_task, master_task +#else + use ice_communicate, only: my_task, mpiR16, mpiR8, mpiR4, master_task +#endif use ice_constants, only: field_loc_Nface, field_loc_NEcorner - use ice_blocks, only: block, get_block + use ice_fileunits, only: bfbflag + use ice_exit, only: abort_ice use ice_distribution, only: distrb, ice_distributionGet, & ice_distributionGetBlockID - use ice_domain_size, only: nx_global - use ice_exit, only: abort_ice + use ice_domain_size, only: nx_global, ny_global, max_blocks + use ice_gather_scatter, only: gather_global use icepack_intfc, only: icepack_warnings_flush, icepack_warnings_aborted + use ice_reprosum, only: ice_reprosum_calc implicit none private +#ifndef SERIAL_REMOVE_MPI + include 'mpif.h' +#endif + public :: global_sum, & global_sum_prod, & global_maxval, & @@ -85,7 +101,7 @@ function global_sum_dbl(array, dist, field_loc, mMask, lMask) & array ! array to be summed type (distrb), intent(in) :: & - dist ! block distribution for array + dist ! block distribution for array X integer (int_kind), intent(in) :: & field_loc ! location of field on staggered grid @@ -105,25 +121,24 @@ function global_sum_dbl(array, dist, field_loc, mMask, lMask) & ! !----------------------------------------------------------------------- -#ifdef REPRODUCIBLE - real (r16_kind) :: & - localSum, &! sum of local block domain - globalSumTmp ! higher precision global sum -#else - real (dbl_kind) :: & - localSum ! sum of local block domain -#endif - integer (int_kind) :: & - i,j,iblock, &! local counters + i,j,iblock,n, &! local counters ib,ie,jb,je, &! beg,end of physical domain blockID, &! block location + numProcs, &! number of processor participating numBlocks, &! number of local blocks + communicator, &! communicator for this distribution maxiglob ! maximum non-redundant value of i_global logical (log_kind) :: & Nrow ! this field is on a N row (a velocity row) + real (dbl_kind), dimension(:,:), allocatable :: & + work ! temporary local array + + real (dbl_kind), dimension(:), allocatable :: & + sums ! array of sums + type (block) :: & this_block ! holds local block information @@ -131,14 +146,20 @@ function global_sum_dbl(array, dist, field_loc, mMask, lMask) & !----------------------------------------------------------------------- -#ifdef REPRODUCIBLE - globalSumTmp = 0.0_r16_kind -#else - globalSum = 0.0_dbl_kind -#endif call ice_distributionGet(dist, & - numLocalBlocks = numBlocks) + numLocalBlocks = numBlocks, & + nprocs = numProcs, & + communicator = communicator) + + if (numBlocks == 0) then + allocate(work(1,1)) + else + allocate(work(nx_block*ny_block*numBlocks,1)) + endif + allocate(sums(1)) + work = 0.0_dbl_kind + sums = 0.0_dbl_kind do iblock=1,numBlocks call ice_distributionGetBlockID(dist, iblock, blockID) @@ -150,42 +171,13 @@ function global_sum_dbl(array, dist, field_loc, mMask, lMask) & jb = this_block%jlo je = this_block%jhi -#ifdef REPRODUCIBLE - localSum = 0.0_r16_kind -#else - localSum = 0.0_dbl_kind -#endif - - if (present(mMask)) then - do j=jb,je - do i=ib,ie - localSum = & - localSum + array(i,j,iblock)*mMask(i,j,iblock) - end do - end do - else if (present(lMask)) then - do j=jb,je - do i=ib,ie - if (lMask(i,j,iblock)) then - localSum = & - localSum + array(i,j,iblock) - endif - end do - end do - else - do j=jb,je - do i=ib,ie - localSum = localSum + array(i,j,iblock) - end do - end do - endif - !*** if this row along or beyond tripole boundary !*** must eliminate redundant points from global sum + maxiglob = -1 if (this_block%tripole) then Nrow=(field_loc == field_loc_Nface .or. & - field_loc == field_loc_NEcorner) + field_loc == field_loc_NEcorner) if (Nrow .and. this_block%tripoleTFlag) then maxiglob = 0 ! entire u-row on T-fold grid elseif (Nrow .or. this_block%tripoleTFlag) then @@ -193,49 +185,37 @@ function global_sum_dbl(array, dist, field_loc, mMask, lMask) & else maxiglob = -1 ! nothing to do for T-row on u-fold endif + endif - if (maxiglob > 0) then - - j = je + n = (iblock-1)*nx_block*ny_block + do j=jb,je + do i=ib,ie + n = n + 1 + ! eliminate redundant points + if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then + work(n,1) = 0._dbl_kind + else if (present(mMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - localSum = & - localSum - array(i,j,iblock)*mMask(i,j,iblock) - endif - end do + work(n,1) = array(i,j,iblock)*mMask(i,j,iblock) else if (present(lMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - if (lMask(i,j,iblock)) & - localSum = localSum - array(i,j,iblock) - endif - end do + if (lMask(i,j,iblock)) then + work(n,1) = array(i,j,iblock) + endif else - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - localSum = localSum - array(i,j,iblock) - endif - end do + work(n,1) = array(i,j,iblock) endif - endif - endif - - !*** now add block sum to global sum + end do + end do + end do -#ifdef REPRODUCIBLE - globalSumTmp = globalSumTmp + localSum -#else - globalSum = globalSum + localSum -#endif + call compute_sums_dbl(work,sums,communicator,numProcs) - end do + globalSum = sums(1) -#ifdef REPRODUCIBLE - globalSum = globalSumTmp -#endif + deallocate(work) + deallocate(sums) !----------------------------------------------------------------------- @@ -249,8 +229,8 @@ function global_sum_real(array, dist, field_loc, mMask, lMask) & ! Computes the global sum of the physical domain of a 2-d array. ! ! This is actually the specific interface for the generic global_sum -! function corresponding to real arrays. The generic -! interface is identical but will handle real and integer 2-d slabs +! function corresponding to single precision arrays. The generic +! interface is identical but will handle double, real and integer 2-d slabs ! and real, integer, and double precision scalars. real (real_kind), dimension(:,:,:), intent(in) :: & @@ -277,25 +257,24 @@ function global_sum_real(array, dist, field_loc, mMask, lMask) & ! !----------------------------------------------------------------------- -#ifdef REPRODUCIBLE - real (dbl_kind) :: & - localSum, &! sum of local block domain - globalSumTmp ! higher precision global sum -#else - real (real_kind) :: & - localSum ! sum of local block domain -#endif - integer (int_kind) :: & - i,j,iblock, &! local counters + i,j,iblock,n, &! local counters ib,ie,jb,je, &! beg,end of physical domain blockID, &! block location + numProcs, &! number of processor participating numBlocks, &! number of local blocks + communicator, &! communicator for this distribution maxiglob ! maximum non-redundant value of i_global logical (log_kind) :: & Nrow ! this field is on a N row (a velocity row) + real (dbl_kind), dimension(:,:), allocatable :: & + work ! temporary local array + + real (dbl_kind), dimension(:), allocatable :: & + sums ! array of sums + type (block) :: & this_block ! holds local block information @@ -303,14 +282,20 @@ function global_sum_real(array, dist, field_loc, mMask, lMask) & !----------------------------------------------------------------------- -#ifdef REPRODUCIBLE - globalSumTmp = 0.0_dbl_kind -#else - globalSum = 0.0_real_kind -#endif call ice_distributionGet(dist, & - numLocalBlocks = numBlocks) + numLocalBlocks = numBlocks, & + nprocs = numProcs, & + communicator = communicator) + + if (numBlocks == 0) then + allocate(work(1,1)) + else + allocate(work(nx_block*ny_block*numBlocks,1)) + endif + allocate(sums(1)) + work = 0.0_dbl_kind + sums = 0.0_dbl_kind do iblock=1,numBlocks call ice_distributionGetBlockID(dist, iblock, blockID) @@ -322,42 +307,13 @@ function global_sum_real(array, dist, field_loc, mMask, lMask) & jb = this_block%jlo je = this_block%jhi -#ifdef REPRODUCIBLE - localSum = 0.0_dbl_kind -#else - localSum = 0.0_real_kind -#endif - - if (present(mMask)) then - do j=jb,je - do i=ib,ie - localSum = & - localSum + array(i,j,iblock)*mMask(i,j,iblock) - end do - end do - else if (present(lMask)) then - do j=jb,je - do i=ib,ie - if (lMask(i,j,iblock)) then - localSum = & - localSum + array(i,j,iblock) - endif - end do - end do - else - do j=jb,je - do i=ib,ie - localSum = localSum + array(i,j,iblock) - end do - end do - endif - !*** if this row along or beyond tripole boundary !*** must eliminate redundant points from global sum + maxiglob = -1 if (this_block%tripole) then Nrow=(field_loc == field_loc_Nface .or. & - field_loc == field_loc_NEcorner) + field_loc == field_loc_NEcorner) if (Nrow .and. this_block%tripoleTFlag) then maxiglob = 0 ! entire u-row on T-fold grid elseif (Nrow .or. this_block%tripoleTFlag) then @@ -365,49 +321,37 @@ function global_sum_real(array, dist, field_loc, mMask, lMask) & else maxiglob = -1 ! nothing to do for T-row on u-fold endif + endif - if (maxiglob > 0) then - - j = je + n = (iblock-1)*nx_block*ny_block + do j=jb,je + do i=ib,ie + n = n + 1 + ! eliminate redundant points + if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then + work(n,1) = 0._dbl_kind + else if (present(mMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - localSum = & - localSum - array(i,j,iblock)*mMask(i,j,iblock) - endif - end do + work(n,1) = real(array(i,j,iblock)*mMask(i,j,iblock),dbl_kind) else if (present(lMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - if (lMask(i,j,iblock)) & - localSum = localSum - array(i,j,iblock) - endif - end do + if (lMask(i,j,iblock)) then + work(n,1) = real(array(i,j,iblock),dbl_kind) + endif else - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - localSum = localSum - array(i,j,iblock) - endif - end do + work(n,1) = real(array(i,j,iblock),dbl_kind) endif - endif - endif - - !*** now add block sum to global sum + end do + end do + end do -#ifdef REPRODUCIBLE - globalSumTmp = globalSumTmp + localSum -#else - globalSum = globalSum + localSum -#endif + call compute_sums_dbl(work,sums,communicator,numProcs) - end do + globalSum = real(sums(1),real_kind) -#ifdef REPRODUCIBLE - globalSum = globalSumTmp -#endif + deallocate(work) + deallocate(sums) !----------------------------------------------------------------------- @@ -450,13 +394,17 @@ function global_sum_int(array, dist, field_loc, mMask, lMask) & !----------------------------------------------------------------------- integer (int_kind) :: & + blockSum, &! sum of local block domain localSum ! sum of all local block domains integer (int_kind) :: & i,j,iblock, &! local counters ib,ie,jb,je, &! beg,end of physical domain + ierr, &! mpi error flag blockID, &! block location + numProcs, &! number of processor participating numBlocks, &! number of local blocks + communicator, &! communicator for this distribution maxiglob ! maximum non-redundant value of i_global logical (log_kind) :: & @@ -469,10 +417,13 @@ function global_sum_int(array, dist, field_loc, mMask, lMask) & !----------------------------------------------------------------------- + localSum = 0_int_kind globalSum = 0_int_kind call ice_distributionGet(dist, & - numLocalBlocks = numBlocks) + numLocalBlocks = numBlocks, & + nprocs = numProcs, & + communicator = communicator) do iblock=1,numBlocks call ice_distributionGetBlockID(dist, iblock, blockID) @@ -484,38 +435,13 @@ function global_sum_int(array, dist, field_loc, mMask, lMask) & jb = this_block%jlo je = this_block%jhi - localSum = 0 - - if (present(mMask)) then - do j=jb,je - do i=ib,ie - localSum = & - localSum + array(i,j,iblock)*mMask(i,j,iblock) - end do - end do - else if (present(lMask)) then - do j=jb,je - do i=ib,ie - if (lMask(i,j,iblock)) then - localSum = & - localSum + array(i,j,iblock) - endif - end do - end do - else - do j=jb,je - do i=ib,ie - localSum = localSum + array(i,j,iblock) - end do - end do - endif - !*** if this row along or beyond tripole boundary !*** must eliminate redundant points from global sum + maxiglob = -1 if (this_block%tripole) then Nrow=(field_loc == field_loc_Nface .or. & - field_loc == field_loc_NEcorner) + field_loc == field_loc_NEcorner) if (Nrow .and. this_block%tripoleTFlag) then maxiglob = 0 ! entire u-row on T-fold grid elseif (Nrow .or. this_block%tripoleTFlag) then @@ -523,42 +449,50 @@ function global_sum_int(array, dist, field_loc, mMask, lMask) & else maxiglob = -1 ! nothing to do for T-row on u-fold endif + endif - if (maxiglob > 0) then - - j = je + blockSum = 0_int_kind + do j=jb,je + do i=ib,ie + ! eliminate redundant points + if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then +! blockSum = blockSum + 0_int_kind + else if (present(mMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - localSum = & - localSum - array(i,j,iblock)*mMask(i,j,iblock) - endif - end do + blockSum = blockSum + array(i,j,iblock)*mMask(i,j,iblock) else if (present(lMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - if (lMask(i,j,iblock)) & - localSum = localSum - array(i,j,iblock) - endif - end do + if (lMask(i,j,iblock)) then + blockSum = blockSum + array(i,j,iblock) + endif else - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - localSum = localSum - array(i,j,iblock) - endif - end do + blockSum = blockSum + array(i,j,iblock) endif - endif - endif + end do + end do !*** now add block sum to global sum - globalSum = globalSum + localSum + localSum = localSum + blockSum end do +!----------------------------------------------------------------------- +! +! now use MPI global reduction to reduce local sum to global sum +! +!----------------------------------------------------------------------- + +#ifdef SERIAL_REMOVE_MPI + globalSum = localSum +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(localSum, globalSum, 1, & + MPI_INTEGER, MPI_SUM, communicator, ierr) + endif +#endif + !----------------------------------------------------------------------- end function global_sum_int @@ -580,20 +514,54 @@ function global_sum_scalar_dbl(scalar, dist) & scalar ! scalar to be summed type (distrb), intent(in) :: & - dist ! block distribution + dist ! block distribution for array X real (dbl_kind) :: & globalSum ! resulting global sum +!----------------------------------------------------------------------- +! +! local variables +! +!----------------------------------------------------------------------- + + integer (int_kind) :: & + ierr, &! mpi error flag + numProcs, &! number of processor participating + numBlocks, &! number of local blocks + communicator ! communicator for this distribution + + real (dbl_kind), dimension(:,:), allocatable :: & + work ! temporary local array + + real (dbl_kind), dimension(:), allocatable :: & + sums ! array of sums + character(len=*), parameter :: subname = '(global_sum_scalar_dbl)' !----------------------------------------------------------------------- ! -! no operation needed for serial execution +! get communicator for MPI calls ! !----------------------------------------------------------------------- - globalSum = scalar + call ice_distributionGet(dist, & + numLocalBlocks = numBlocks, & + nprocs = numProcs, & + communicator = communicator) + + + allocate(work(1,1)) + allocate(sums(1)) + work(1,1) = scalar + sums = 0.0_dbl_kind + + call compute_sums_dbl(work,sums,communicator,numProcs) + + globalSum = sums(1) + + deallocate(work) + deallocate(sums) !----------------------------------------------------------------------- @@ -616,20 +584,53 @@ function global_sum_scalar_real(scalar, dist) & scalar ! scalar to be summed type (distrb), intent(in) :: & - dist ! block distribution + dist ! block distribution for array X real (real_kind) :: & globalSum ! resulting global sum +!----------------------------------------------------------------------- +! +! local variables +! +!----------------------------------------------------------------------- + + integer (int_kind) :: & + ierr, &! mpi error flag + numProcs, &! number of processor participating + numBlocks, &! number of local blocks + communicator ! communicator for this distribution + + real (dbl_kind), dimension(:,:), allocatable :: & + work ! temporary local array + + real (dbl_kind), dimension(:), allocatable :: & + sums ! array of sums + character(len=*), parameter :: subname = '(global_sum_scalar_real)' !----------------------------------------------------------------------- ! -! no operation needed for serial execution +! get communicator for MPI calls ! !----------------------------------------------------------------------- - globalSum = scalar + call ice_distributionGet(dist, & + numLocalBlocks = numBlocks, & + nprocs = numProcs, & + communicator = communicator) + + allocate(work(1,1)) + allocate(sums(1)) + work(1,1) = real(scalar,dbl_kind) + sums = 0.0_dbl_kind + + call compute_sums_dbl(work,sums,communicator,numProcs) + + globalSum = real(sums(1),real_kind) + + deallocate(work) + deallocate(sums) !----------------------------------------------------------------------- @@ -652,20 +653,50 @@ function global_sum_scalar_int(scalar, dist) & scalar ! scalar to be summed type (distrb), intent(in) :: & - dist ! block distribution + dist ! block distribution for array X integer (int_kind) :: & globalSum ! resulting global sum +!----------------------------------------------------------------------- +! +! local variables +! +!----------------------------------------------------------------------- + + integer (int_kind) :: & + ierr, &! mpi error flag + numProcs, &! number of processor participating + numBlocks, &! number of local blocks + communicator ! communicator for this distribution + character(len=*), parameter :: subname = '(global_sum_scalar_int)' !----------------------------------------------------------------------- ! -! no operation needed for serial execution +! get communicator for MPI calls ! !----------------------------------------------------------------------- + call ice_distributionGet(dist, & + numLocalBlocks = numBlocks, & + nprocs = numProcs, & + communicator = communicator) + +!----------------------------------------------------------------------- +! +! now use MPI global reduction to reduce local sum to global sum +! +!----------------------------------------------------------------------- + +#ifdef SERIAL_REMOVE_MPI globalSum = scalar +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(scalar, globalSum, 1, & + MPI_INTEGER, MPI_SUM, communicator, ierr) + endif +#endif !----------------------------------------------------------------------- @@ -709,40 +740,45 @@ function global_sum_prod_dbl (array1, array2, dist, field_loc, & ! !----------------------------------------------------------------------- -#ifdef REPRODUCIBLE - real (r16_kind) :: & - localSum, &! sum of local block domain - globalSumTmp ! higher precision for reproducibility -#else - real (dbl_kind) :: & - localSum ! sum of local block domain -#endif - integer (int_kind) :: & - i,j,iblock, &! local counters - ib,ie,jb,je, &! beg,end of physical domain - blockID, &! block location - numBlocks, &! number of local blocks - maxiglob ! maximum non-redundant value of i_global + i,j,iblock,n, &! local counters + ib,ie,jb,je, &! beg,end of physical domain + blockID, &! block location + numProcs, &! number of processor participating + numBlocks, &! number of local blocks + communicator, &! communicator for this distribution + maxiglob ! maximum non-redundant value of i_global logical (log_kind) :: & Nrow ! this field is on a N row (a velocity row) + real (dbl_kind), dimension(:,:), allocatable :: & + work ! temporary local array + + real (dbl_kind), dimension(:), allocatable :: & + sums ! array of sums + type (block) :: & - this_block ! holds local block information + this_block ! holds local block information character(len=*), parameter :: subname = '(global_sum_prod_dbl)' !----------------------------------------------------------------------- -#ifdef REPRODUCIBLE - globalSum = 0.0_r16_kind -#else - globalSum = 0.0_dbl_kind -#endif - call ice_distributionGet(dist, & - numLocalBlocks = numBlocks) + call ice_distributionGet(dist, & + numLocalBlocks = numBlocks, & + nprocs = numProcs, & + communicator = communicator) + + if (numBlocks == 0) then + allocate(work(1,1)) + else + allocate(work(nx_block*ny_block*numBlocks,1)) + endif + allocate(sums(1)) + work = 0.0_dbl_kind + sums = 0.0_dbl_kind do iblock=1,numBlocks call ice_distributionGetBlockID(dist, iblock, blockID) @@ -754,43 +790,13 @@ function global_sum_prod_dbl (array1, array2, dist, field_loc, & jb = this_block%jlo je = this_block%jhi -#ifdef REPRODUCIBLE - localSum = 0.0_r16_kind -#else - localSum = 0.0_dbl_kind -#endif - - if (present(mMask)) then - do j=jb,je - do i=ib,ie - localSum = & - localSum + array1(i,j,iblock)*array2(i,j,iblock)* & - mMask(i,j,iblock) - end do - end do - else if (present(lMask)) then - do j=jb,je - do i=ib,ie - if (lMask(i,j,iblock)) then - localSum = & - localSum + array1(i,j,iblock)*array2(i,j,iblock) - endif - end do - end do - else - do j=jb,je - do i=ib,ie - localSum = localSum + array1(i,j,iblock)*array2(i,j,iblock) - end do - end do - endif - !*** if this row along or beyond tripole boundary !*** must eliminate redundant points from global sum + maxiglob = -1 if (this_block%tripole) then Nrow=(field_loc == field_loc_Nface .or. & - field_loc == field_loc_NEcorner) + field_loc == field_loc_NEcorner) if (Nrow .and. this_block%tripoleTFlag) then maxiglob = 0 ! entire u-row on T-fold grid elseif (Nrow .or. this_block%tripoleTFlag) then @@ -798,52 +804,37 @@ function global_sum_prod_dbl (array1, array2, dist, field_loc, & else maxiglob = -1 ! nothing to do for T-row on u-fold endif + endif - if (maxiglob > 0) then - - j = je + n = (iblock-1)*nx_block*ny_block + do j=jb,je + do i=ib,ie + n = n + 1 + ! eliminate redundant points + if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then + work(n,1) = 0._dbl_kind + else if (present(mMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - localSum = & - localSum - array1(i,j,iblock)*array2(i,j,iblock)* & - mMask(i,j,iblock) - endif - end do + work(n,1) = array1(i,j,iblock)*array2(i,j,iblock)*mMask(i,j,iblock) else if (present(lMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - if (lMask(i,j,iblock)) & - localSum = localSum - & - array1(i,j,iblock)*array2(i,j,iblock) - endif - end do + if (lMask(i,j,iblock)) then + work(n,1) = array1(i,j,iblock)*array2(i,j,iblock) + endif else - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - localSum = localSum - & - array1(i,j,iblock)*array2(i,j,iblock) - endif - end do + work(n,1) = array1(i,j,iblock)*array2(i,j,iblock) endif - endif - endif - - !*** now add block sum to global sum + end do + end do + end do -#ifdef REPRODUCIBLE - globalSumTmp = globalSumTmp + localSum -#else - globalSum = globalSum + localSum -#endif + call compute_sums_dbl(work,sums,communicator,numProcs) - end do + globalSum = sums(1) -#ifdef REPRODUCIBLE - globalSum = globalSumTmp -#endif + deallocate(work) + deallocate(sums) !----------------------------------------------------------------------- @@ -887,40 +878,45 @@ function global_sum_prod_real (array1, array2, dist, field_loc, & ! !----------------------------------------------------------------------- -#ifdef REPRODUCIBLE - real (dbl_kind) :: & - localSum, &! sum of local block domain - globalSumTmp ! higher precision for reproducibility -#else - real (real_kind) :: & - localSum ! sum of local block domain -#endif - integer (int_kind) :: & - i,j,iblock, &! local counters - ib,ie,jb,je, &! beg,end of physical domain - blockID, &! block location - numBlocks, &! number of local blocks - maxiglob ! maximum non-redundant value of i_global + i,j,iblock,n, &! local counters + ib,ie,jb,je, &! beg,end of physical domain + blockID, &! block location + numProcs, &! number of processor participating + numBlocks, &! number of local blocks + communicator, &! communicator for this distribution + maxiglob ! maximum non-redundant value of i_global logical (log_kind) :: & Nrow ! this field is on a N row (a velocity row) + real (dbl_kind), dimension(:,:), allocatable :: & + work ! temporary local array + + real (dbl_kind), dimension(:), allocatable :: & + sums ! array of sums + type (block) :: & - this_block ! holds local block information + this_block ! holds local block information - character(len=*), parameter :: subname = '(global_sum_prod_real)' + character(len=*), parameter :: subname = '(global_sum_prod_dbl)' !----------------------------------------------------------------------- -#ifdef REPRODUCIBLE - globalSumTmp = 0.0_dbl_kind -#else - globalSum = 0.0_real_kind -#endif - call ice_distributionGet(dist, & - numLocalBlocks = numBlocks) + call ice_distributionGet(dist, & + numLocalBlocks = numBlocks, & + nprocs = numProcs, & + communicator = communicator) + + if (numBlocks == 0) then + allocate(work(1,1)) + else + allocate(work(nx_block*ny_block*numBlocks,1)) + endif + allocate(sums(1)) + work = 0.0_dbl_kind + sums = 0.0_dbl_kind do iblock=1,numBlocks call ice_distributionGetBlockID(dist, iblock, blockID) @@ -932,43 +928,13 @@ function global_sum_prod_real (array1, array2, dist, field_loc, & jb = this_block%jlo je = this_block%jhi -#ifdef REPRODUCIBLE - localSum = 0.0_dbl_kind -#else - localSum = 0.0_real_kind -#endif - - if (present(mMask)) then - do j=jb,je - do i=ib,ie - localSum = & - localSum + array1(i,j,iblock)*array2(i,j,iblock)* & - mMask(i,j,iblock) - end do - end do - else if (present(lMask)) then - do j=jb,je - do i=ib,ie - if (lMask(i,j,iblock)) then - localSum = & - localSum + array1(i,j,iblock)*array2(i,j,iblock) - endif - end do - end do - else - do j=jb,je - do i=ib,ie - localSum = localSum + array1(i,j,iblock)*array2(i,j,iblock) - end do - end do - endif - !*** if this row along or beyond tripole boundary !*** must eliminate redundant points from global sum + maxiglob = -1 if (this_block%tripole) then Nrow=(field_loc == field_loc_Nface .or. & - field_loc == field_loc_NEcorner) + field_loc == field_loc_NEcorner) if (Nrow .and. this_block%tripoleTFlag) then maxiglob = 0 ! entire u-row on T-fold grid elseif (Nrow .or. this_block%tripoleTFlag) then @@ -976,52 +942,37 @@ function global_sum_prod_real (array1, array2, dist, field_loc, & else maxiglob = -1 ! nothing to do for T-row on u-fold endif + endif - if (maxiglob > 0) then - - j = je + n = (iblock-1)*nx_block*ny_block + do j=jb,je + do i=ib,ie + n = n + 1 + ! eliminate redundant points + if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then + work(n,1) = 0._dbl_kind + else if (present(mMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - localSum = & - localSum - array1(i,j,iblock)*array2(i,j,iblock)* & - mMask(i,j,iblock) - endif - end do + work(n,1) = real(array1(i,j,iblock)*array2(i,j,iblock)*mMask(i,j,iblock),dbl_kind) else if (present(lMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - if (lMask(i,j,iblock)) & - localSum = localSum - & - array1(i,j,iblock)*array2(i,j,iblock) - endif - end do + if (lMask(i,j,iblock)) then + work(n,1) = real(array1(i,j,iblock)*array2(i,j,iblock),dbl_kind) + endif else - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - localSum = localSum - & - array1(i,j,iblock)*array2(i,j,iblock) - endif - end do + work(n,1) = real(array1(i,j,iblock)*array2(i,j,iblock),dbl_kind) endif - endif - endif + end do + end do + end do - !*** now add block sum to global sum + call compute_sums_dbl(work,sums,communicator,numProcs) -#ifdef REPRODUCIBLE - globalSumTmp = globalSumTmp + localSum -#else - globalSum = globalSum + localSum -#endif + globalSum = real(sums(1),real_kind) - end do - -#ifdef REPRODUCIBLE - globalSum = globalSumTmp -#endif + deallocate(work) + deallocate(sums) !----------------------------------------------------------------------- @@ -1066,13 +1017,17 @@ function global_sum_prod_int (array1, array2, dist, field_loc, & !----------------------------------------------------------------------- integer (int_kind) :: & - localSum ! sum of local block domain + blockSum, &! sum of local block domain + localSum ! sum of all local block domains integer (int_kind) :: & i,j,iblock, &! local counters ib,ie,jb,je, &! beg,end of physical domain + ierr, &! mpi error flag blockID, &! block location numBlocks, &! number of local blocks + numProcs, &! number of processor participating + communicator, &! communicator for this distribution maxiglob ! maximum non-redundant value of i_global logical (log_kind) :: & @@ -1085,10 +1040,13 @@ function global_sum_prod_int (array1, array2, dist, field_loc, & !----------------------------------------------------------------------- + localSum = 0_int_kind globalSum = 0_int_kind call ice_distributionGet(dist, & - numLocalBlocks = numBlocks) + numLocalBlocks = numBlocks, & + nprocs = numProcs, & + communicator = communicator) do iblock=1,numBlocks call ice_distributionGetBlockID(dist, iblock, blockID) @@ -1100,39 +1058,13 @@ function global_sum_prod_int (array1, array2, dist, field_loc, & jb = this_block%jlo je = this_block%jhi - localSum = 0 - - if (present(mMask)) then - do j=jb,je - do i=ib,ie - localSum = & - localSum + array1(i,j,iblock)*array2(i,j,iblock)* & - mMask(i,j,iblock) - end do - end do - else if (present(lMask)) then - do j=jb,je - do i=ib,ie - if (lMask(i,j,iblock)) then - localSum = & - localSum + array1(i,j,iblock)*array2(i,j,iblock) - endif - end do - end do - else - do j=jb,je - do i=ib,ie - localSum = localSum + array1(i,j,iblock)*array2(i,j,iblock) - end do - end do - endif - !*** if this row along or beyond tripole boundary !*** must eliminate redundant points from global sum + maxiglob = -1 if (this_block%tripole) then Nrow=(field_loc == field_loc_Nface .or. & - field_loc == field_loc_NEcorner) + field_loc == field_loc_NEcorner) if (Nrow .and. this_block%tripoleTFlag) then maxiglob = 0 ! entire u-row on T-fold grid elseif (Nrow .or. this_block%tripoleTFlag) then @@ -1140,45 +1072,50 @@ function global_sum_prod_int (array1, array2, dist, field_loc, & else maxiglob = -1 ! nothing to do for T-row on u-fold endif + endif - if (maxiglob > 0) then - - j = je + blockSum = 0_int_kind + do j=jb,je + do i=ib,ie + ! eliminate redundant points + if (maxiglob > 0 .and. j == je .and. this_block%i_glob(i) > maxiglob) then +! blockSum = blockSum + 0_int_kind + else if (present(mMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - localSum = & - localSum - array1(i,j,iblock)*array2(i,j,iblock)* & - mMask(i,j,iblock) - endif - end do + blockSum = blockSum + array1(i,j,iblock)*array2(i,j,iblock)*mMask(i,j,iblock) else if (present(lMask)) then - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - if (lMask(i,j,iblock)) & - localSum = localSum - & - array1(i,j,iblock)*array2(i,j,iblock) - endif - end do + if (lMask(i,j,iblock)) then + blockSum = blockSum + array1(i,j,iblock)*array2(i,j,iblock) + endif else - do i=ib,ie - if (this_block%i_glob(i) > maxiglob) then - localSum = localSum - & - array1(i,j,iblock)*array2(i,j,iblock) - endif - end do + blockSum = blockSum + array1(i,j,iblock)*array2(i,j,iblock) endif - endif - endif + end do + end do !*** now add block sum to global sum - globalSum = globalSum + localSum + localSum = localSum + blockSum end do +!----------------------------------------------------------------------- +! +! now use MPI global reduction to reduce local sum to global sum +! +!----------------------------------------------------------------------- + +#ifdef SERIAL_REMOVE_MPI + globalSum = localSum +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(localSum, globalSum, 1, & + MPI_INTEGER, MPI_SUM, communicator, ierr) + endif +#endif + !----------------------------------------------------------------------- end function global_sum_prod_int @@ -1212,12 +1149,16 @@ function global_maxval_dbl (array, dist, lMask) & !----------------------------------------------------------------------- real (dbl_kind) :: & - localMaxval ! sum of local block domain + blockMaxval, &! sum of local block domain + localMaxval ! sum of all local block domains integer (int_kind) :: & i,j,iblock, &! local counters ib,ie,jb,je, &! beg,end of physical domain + ierr, &! mpi error flag numBlocks, &! number of local blocks + numProcs, &! number of processor participating + communicator, &! communicator for this distribution blockID ! block location type (block) :: & @@ -1227,10 +1168,13 @@ function global_maxval_dbl (array, dist, lMask) & !----------------------------------------------------------------------- + localMaxval = -HUGE(0.0_dbl_kind) globalMaxval = -HUGE(0.0_dbl_kind) call ice_distributionGet(dist, & - numLocalBlocks = numBlocks) + numLocalBlocks = numBlocks, & + nprocs = numProcs, & + communicator = communicator) do iblock=1,numBlocks call ice_distributionGetBlockID(dist, iblock, blockID) @@ -1242,28 +1186,43 @@ function global_maxval_dbl (array, dist, lMask) & jb = this_block%jlo je = this_block%jhi - localMaxval = -HUGE(0.0_dbl_kind) + blockMaxval = -HUGE(0.0_dbl_kind) if (present(lMask)) then do j=jb,je do i=ib,ie if (lMask(i,j,iblock)) then - localMaxval = max(localMaxval,array(i,j,iblock)) + blockMaxval = max(blockMaxval,array(i,j,iblock)) endif end do end do else do j=jb,je do i=ib,ie - localMaxval = max(localMaxval,array(i,j,iblock)) + blockMaxval = max(blockMaxval,array(i,j,iblock)) end do end do endif - globalMaxval = max(globalMaxval,localMaxval) + localMaxval = max(localMaxval,blockMaxval) end do +!----------------------------------------------------------------------- +! +! now use MPI global reduction to reduce local maxval to global maxval +! +!----------------------------------------------------------------------- + +#ifdef SERIAL_REMOVE_MPI + globalMaxval = localMaxval +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(localMaxval, globalMaxval, 1, & + mpiR8, MPI_MAX, communicator, ierr) + endif +#endif + !----------------------------------------------------------------------- end function global_maxval_dbl @@ -1297,12 +1256,16 @@ function global_maxval_real (array, dist, lMask) & !----------------------------------------------------------------------- real (real_kind) :: & - localMaxval ! sum of local block domain + blockMaxval, &! sum of local block domain + localMaxval ! sum of all local block domains integer (int_kind) :: & i,j,iblock, &! local counters ib,ie,jb,je, &! beg,end of physical domain + ierr, &! mpi error flag numBlocks, &! number of local blocks + numProcs, &! number of processor participating + communicator, &! communicator for this distribution blockID ! block location type (block) :: & @@ -1312,10 +1275,13 @@ function global_maxval_real (array, dist, lMask) & !----------------------------------------------------------------------- + localMaxval = -HUGE(0.0_real_kind) globalMaxval = -HUGE(0.0_real_kind) call ice_distributionGet(dist, & - numLocalBlocks = numBlocks) + numLocalBlocks = numBlocks, & + nprocs = numProcs, & + communicator = communicator) do iblock=1,numBlocks call ice_distributionGetBlockID(dist, iblock, blockID) @@ -1327,28 +1293,43 @@ function global_maxval_real (array, dist, lMask) & jb = this_block%jlo je = this_block%jhi - localMaxval = -HUGE(0.0_real_kind) + blockMaxval = -HUGE(0.0_real_kind) if (present(lMask)) then do j=jb,je do i=ib,ie if (lMask(i,j,iblock)) then - localMaxval = max(localMaxval,array(i,j,iblock)) + blockMaxval = max(blockMaxval,array(i,j,iblock)) endif end do end do else do j=jb,je do i=ib,ie - localMaxval = max(localMaxval,array(i,j,iblock)) + blockMaxval = max(blockMaxval,array(i,j,iblock)) end do end do endif - globalMaxval = max(globalMaxval,localMaxval) + localMaxval = max(localMaxval,blockMaxval) end do +!----------------------------------------------------------------------- +! +! now use MPI global reduction to reduce local maxval to global maxval +! +!----------------------------------------------------------------------- + +#ifdef SERIAL_REMOVE_MPI + globalMaxval = localMaxval +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(localMaxval, globalMaxval, 1, & + mpiR4, MPI_MAX, communicator, ierr) + endif +#endif + !----------------------------------------------------------------------- end function global_maxval_real @@ -1382,12 +1363,16 @@ function global_maxval_int (array, dist, lMask) & !----------------------------------------------------------------------- integer (int_kind) :: & - localMaxval ! sum of local block domain + blockMaxval, &! sum of local block domain + localMaxval ! sum of all local block domains integer (int_kind) :: & i,j,iblock, &! local counters ib,ie,jb,je, &! beg,end of physical domain + ierr, &! mpi error flag numBlocks, &! number of local blocks + numProcs, &! number of processor participating + communicator, &! communicator for this distribution blockID ! block location type (block) :: & @@ -1397,10 +1382,13 @@ function global_maxval_int (array, dist, lMask) & !----------------------------------------------------------------------- + localMaxval = -HUGE(0_int_kind) globalMaxval = -HUGE(0_int_kind) call ice_distributionGet(dist, & - numLocalBlocks = numBlocks) + numLocalBlocks = numBlocks, & + nprocs = numProcs, & + communicator = communicator) do iblock=1,numBlocks call ice_distributionGetBlockID(dist, iblock, blockID) @@ -1412,28 +1400,43 @@ function global_maxval_int (array, dist, lMask) & jb = this_block%jlo je = this_block%jhi - localMaxval = -HUGE(0_int_kind) + blockMaxval = -HUGE(0_int_kind) if (present(lMask)) then do j=jb,je do i=ib,ie if (lMask(i,j,iblock)) then - localMaxval = max(localMaxval,array(i,j,iblock)) + blockMaxval = max(blockMaxval,array(i,j,iblock)) endif end do end do else do j=jb,je do i=ib,ie - localMaxval = max(localMaxval,array(i,j,iblock)) + blockMaxval = max(blockMaxval,array(i,j,iblock)) end do end do endif - globalMaxval = max(globalMaxval,localMaxval) + localMaxval = max(localMaxval,blockMaxval) end do +!----------------------------------------------------------------------- +! +! now use MPI global reduction to reduce local maxval to global maxval +! +!----------------------------------------------------------------------- + +#ifdef SERIAL_REMOVE_MPI + globalMaxval = localMaxval +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(localMaxval, globalMaxval, 1, & + MPI_INTEGER, MPI_MAX, communicator, ierr) + endif +#endif + !----------------------------------------------------------------------- end function global_maxval_int @@ -1453,20 +1456,44 @@ function global_maxval_scalar_dbl (scalar, dist) & scalar ! scalar for which max value needed type (distrb), intent(in) :: & - dist ! block distribution + dist ! block distribution real (dbl_kind) :: & globalMaxval ! resulting maximum value +!----------------------------------------------------------------------- +! +! local variables +! +!----------------------------------------------------------------------- + + integer (int_kind) :: & + ierr, &! mpi error flag + numProcs, &! number of processor participating + communicator ! communicator for this distribution + character(len=*), parameter :: subname = '(global_maxval_scalar_dbl)' +!----------------------------------------------------------------------- + + call ice_distributionGet(dist, & + nprocs = numProcs, & + communicator = communicator) + !----------------------------------------------------------------------- ! -! no operations required for serial execution +! now use MPI global reduction to reduce local maxval to global maxval ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI globalMaxval = scalar +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(scalar, globalMaxval, 1, & + mpiR8, MPI_MAX, communicator, ierr) + endif +#endif !----------------------------------------------------------------------- @@ -1487,20 +1514,44 @@ function global_maxval_scalar_real (scalar, dist) & scalar ! scalar for which max value needed type (distrb), intent(in) :: & - dist ! block distribution + dist ! block distribution real (real_kind) :: & globalMaxval ! resulting maximum value +!----------------------------------------------------------------------- +! +! local variables +! +!----------------------------------------------------------------------- + + integer (int_kind) :: & + ierr, &! mpi error flag + numProcs, &! number of processor participating + communicator ! communicator for this distribution + character(len=*), parameter :: subname = '(global_maxval_scalar_real)' +!----------------------------------------------------------------------- + + call ice_distributionGet(dist, & + nprocs = numProcs, & + communicator = communicator) + !----------------------------------------------------------------------- ! -! no operations required for serial execution +! now use MPI global reduction to reduce local maxval to global maxval ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI globalMaxval = scalar +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(scalar, globalMaxval, 1, & + mpiR4, MPI_MAX, communicator, ierr) + endif +#endif !----------------------------------------------------------------------- @@ -1521,20 +1572,44 @@ function global_maxval_scalar_int (scalar, dist) & scalar ! scalar for which max value needed type (distrb), intent(in) :: & - dist ! block distribution + dist ! block distribution integer (int_kind) :: & globalMaxval ! resulting maximum value +!----------------------------------------------------------------------- +! +! local variables +! +!----------------------------------------------------------------------- + + integer (int_kind) :: & + ierr, &! mpi error flag + numProcs, &! number of processor participating + communicator ! communicator for this distribution + character(len=*), parameter :: subname = '(global_maxval_scalar_int)' +!----------------------------------------------------------------------- + + call ice_distributionGet(dist, & + nprocs = numProcs, & + communicator = communicator) + !----------------------------------------------------------------------- ! -! no operations required for serial execution +! now use MPI global reduction to reduce local maxval to global maxval ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI globalMaxval = scalar +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(scalar, globalMaxval, 1, & + MPI_INTEGER, MPI_MAX, communicator, ierr) + endif +#endif !----------------------------------------------------------------------- @@ -1569,12 +1644,16 @@ function global_minval_dbl (array, dist, lMask) & !----------------------------------------------------------------------- real (dbl_kind) :: & - localMinval ! sum of local block domain + blockMinval, &! sum of local block domain + localMinval ! sum of all local block domains integer (int_kind) :: & i,j,iblock, &! local counters ib,ie,jb,je, &! beg,end of physical domain + ierr, &! mpi error flag numBlocks, &! number of local blocks + numProcs, &! number of processor participating + communicator, &! communicator for this distribution blockID ! block location type (block) :: & @@ -1584,10 +1663,13 @@ function global_minval_dbl (array, dist, lMask) & !----------------------------------------------------------------------- + localMinval = HUGE(0.0_dbl_kind) globalMinval = HUGE(0.0_dbl_kind) call ice_distributionGet(dist, & - numLocalBlocks = numBlocks) + numLocalBlocks = numBlocks, & + nprocs = numProcs, & + communicator = communicator) do iblock=1,numBlocks call ice_distributionGetBlockID(dist, iblock, blockID) @@ -1599,28 +1681,43 @@ function global_minval_dbl (array, dist, lMask) & jb = this_block%jlo je = this_block%jhi - localMinval = HUGE(0.0_dbl_kind) + blockMinval = HUGE(0.0_dbl_kind) if (present(lMask)) then do j=jb,je do i=ib,ie if (lMask(i,j,iblock)) then - localMinval = min(localMinval,array(i,j,iblock)) + blockMinval = min(blockMinval,array(i,j,iblock)) endif end do end do else do j=jb,je do i=ib,ie - localMinval = min(localMinval,array(i,j,iblock)) + blockMinval = min(blockMinval,array(i,j,iblock)) end do end do endif - globalMinval = min(globalMinval,localMinval) + localMinval = min(localMinval,blockMinval) end do +!----------------------------------------------------------------------- +! +! now use MPI global reduction to reduce local minval to global minval +! +!----------------------------------------------------------------------- + +#ifdef SERIAL_REMOVE_MPI + globalMinval = localMinval +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(localMinval, globalMinval, 1, & + mpiR8, MPI_MIN, communicator, ierr) + endif +#endif + !----------------------------------------------------------------------- end function global_minval_dbl @@ -1654,12 +1751,16 @@ function global_minval_real (array, dist, lMask) & !----------------------------------------------------------------------- real (real_kind) :: & - localMinval ! sum of local block domain + blockMinval, &! sum of local block domain + localMinval ! sum of all local block domains integer (int_kind) :: & i,j,iblock, &! local counters ib,ie,jb,je, &! beg,end of physical domain + ierr, &! mpi error flag numBlocks, &! number of local blocks + numProcs, &! number of processor participating + communicator, &! communicator for this distribution blockID ! block location type (block) :: & @@ -1669,10 +1770,13 @@ function global_minval_real (array, dist, lMask) & !----------------------------------------------------------------------- + localMinval = HUGE(0.0_real_kind) globalMinval = HUGE(0.0_real_kind) call ice_distributionGet(dist, & - numLocalBlocks = numBlocks) + numLocalBlocks = numBlocks, & + nprocs = numProcs, & + communicator = communicator) do iblock=1,numBlocks call ice_distributionGetBlockID(dist, iblock, blockID) @@ -1684,28 +1788,43 @@ function global_minval_real (array, dist, lMask) & jb = this_block%jlo je = this_block%jhi - localMinval = HUGE(0.0_real_kind) + blockMinval = HUGE(0.0_real_kind) if (present(lMask)) then do j=jb,je do i=ib,ie if (lMask(i,j,iblock)) then - localMinval = min(localMinval,array(i,j,iblock)) + blockMinval = min(blockMinval,array(i,j,iblock)) endif end do end do else do j=jb,je do i=ib,ie - localMinval = min(localMinval,array(i,j,iblock)) + blockMinval = min(blockMinval,array(i,j,iblock)) end do end do endif - globalMinval = min(globalMinval,localMinval) + localMinval = min(localMinval,blockMinval) end do +!----------------------------------------------------------------------- +! +! now use MPI global reduction to reduce local minval to global minval +! +!----------------------------------------------------------------------- + +#ifdef SERIAL_REMOVE_MPI + globalMinval = localMinval +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(localMinval, globalMinval, 1, & + mpiR4, MPI_MIN, communicator, ierr) + endif +#endif + !----------------------------------------------------------------------- end function global_minval_real @@ -1739,12 +1858,16 @@ function global_minval_int (array, dist, lMask) & !----------------------------------------------------------------------- integer (int_kind) :: & - localMinval ! sum of local block domain + blockMinval, &! sum of local block domain + localMinval ! sum of all local block domains integer (int_kind) :: & i,j,iblock, &! local counters ib,ie,jb,je, &! beg,end of physical domain + ierr, &! mpi error flag numBlocks, &! number of local blocks + numProcs, &! number of processor participating + communicator, &! communicator for this distribution blockID ! block location type (block) :: & @@ -1754,10 +1877,13 @@ function global_minval_int (array, dist, lMask) & !----------------------------------------------------------------------- + localMinval = HUGE(0_int_kind) globalMinval = HUGE(0_int_kind) call ice_distributionGet(dist, & - numLocalBlocks = numBlocks) + numLocalBlocks = numBlocks, & + nprocs = numProcs, & + communicator = communicator) do iblock=1,numBlocks call ice_distributionGetBlockID(dist, iblock, blockID) @@ -1769,28 +1895,43 @@ function global_minval_int (array, dist, lMask) & jb = this_block%jlo je = this_block%jhi - localMinval = HUGE(0_int_kind) + blockMinval = HUGE(0_int_kind) if (present(lMask)) then do j=jb,je do i=ib,ie if (lMask(i,j,iblock)) then - localMinval = min(localMinval,array(i,j,iblock)) + blockMinval = min(blockMinval,array(i,j,iblock)) endif end do end do else do j=jb,je do i=ib,ie - localMinval = min(localMinval,array(i,j,iblock)) + blockMinval = min(blockMinval,array(i,j,iblock)) end do end do endif - globalMinval = min(globalMinval,localMinval) + localMinval = min(localMinval,blockMinval) end do +!----------------------------------------------------------------------- +! +! now use MPI global reduction to reduce local minval to global minval +! +!----------------------------------------------------------------------- + +#ifdef SERIAL_REMOVE_MPI + globalMinval = localMinval +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(localMinval, globalMinval, 1, & + MPI_INTEGER, MPI_MIN, communicator, ierr) + endif +#endif + !----------------------------------------------------------------------- end function global_minval_int @@ -1810,20 +1951,44 @@ function global_minval_scalar_dbl (scalar, dist) & scalar ! scalar for which min value needed type (distrb), intent(in) :: & - dist ! block distribution + dist ! block distribution real (dbl_kind) :: & globalMinval ! resulting minimum value +!----------------------------------------------------------------------- +! +! local variables +! +!----------------------------------------------------------------------- + + integer (int_kind) :: & + ierr, &! mpi error flag + numProcs, &! number of processor participating + communicator ! communicator for this distribution + character(len=*), parameter :: subname = '(global_minval_scalar_dbl)' +!----------------------------------------------------------------------- + + call ice_distributionGet(dist, & + nprocs = numProcs, & + communicator = communicator) + !----------------------------------------------------------------------- ! -! no operations required for serial execution +! now use MPI global reduction to reduce local minval to global minval ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI globalMinval = scalar +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(scalar, globalMinval, 1, & + mpiR8, MPI_MIN, communicator, ierr) + endif +#endif !----------------------------------------------------------------------- @@ -1844,20 +2009,44 @@ function global_minval_scalar_real (scalar, dist) & scalar ! scalar for which min value needed type (distrb), intent(in) :: & - dist ! block distribution + dist ! block distribution real (real_kind) :: & globalMinval ! resulting minimum value +!----------------------------------------------------------------------- +! +! local variables +! +!----------------------------------------------------------------------- + + integer (int_kind) :: & + ierr, &! mpi error flag + numProcs, &! number of processor participating + communicator ! communicator for this distribution + character(len=*), parameter :: subname = '(global_minval_scalar_real)' +!----------------------------------------------------------------------- + + call ice_distributionGet(dist, & + nprocs = numProcs, & + communicator = communicator) + !----------------------------------------------------------------------- ! -! no operations required for serial execution +! now use MPI global reduction to reduce local minval to global minval ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI globalMinval = scalar +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(scalar, globalMinval, 1, & + mpiR4, MPI_MIN, communicator, ierr) + endif +#endif !----------------------------------------------------------------------- @@ -1878,27 +2067,198 @@ function global_minval_scalar_int (scalar, dist) & scalar ! scalar for which min value needed type (distrb), intent(in) :: & - dist ! block distribution + dist ! block distribution integer (int_kind) :: & globalMinval ! resulting minimum value +!----------------------------------------------------------------------- +! +! local variables +! +!----------------------------------------------------------------------- + + integer (int_kind) :: & + ierr, &! mpi error flag + numProcs, &! number of processor participating + communicator ! communicator for this distribution + character(len=*), parameter :: subname = '(global_minval_scalar_int)' +!----------------------------------------------------------------------- + + call ice_distributionGet(dist, & + nprocs = numProcs, & + communicator = communicator) + !----------------------------------------------------------------------- ! -! no operations required for serial execution +! now use MPI global reduction to reduce local minval to global minval ! !----------------------------------------------------------------------- +#ifdef SERIAL_REMOVE_MPI globalMinval = scalar +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(scalar, globalMinval, 1, & + MPI_INTEGER, MPI_MIN, communicator, ierr) + endif +#endif !----------------------------------------------------------------------- end function global_minval_scalar_int +!*********************************************************************** +!*********************************************************************** + +subroutine compute_sums_dbl(array2,sums8,mpicomm,numprocs) + +! Computes the global sum of a 2-d array over fields +! with first dimension values and second dimension fields +! +! Several different options are supported. +! lsum4 = local sum with real*4 and scalar mpi allreduce, unlikely to be bfb +! lsum8 = local sum with real*8 and scalar mpi allreduce, unlikely to be bfb +! lsum16 = local sum with real*16 and scalar mpi allreduce, likely to be bfb +! WARNING: this does not work in several compilers and mpi +! implementations due to support for quad precision and consistency +! between underlying datatype in fortran and c. The source code +! can be turned off with a cpp NO_R16. Otherwise, it is recommended +! that the results be validated on any platform where it might be used. +! reprosum = fixed point method based on ordered double integer sums. +! that requires two scalar reductions per global sum. +! This is extremely likely to be bfb. +! (See Mirin and Worley, 2012, IJHPCA, 26, 1730, +! https://journals.sagepub.com/doi/10.1177/1094342011412630) +! ddpdd = parallel double-double algorithm using single scalar reduction. +! This is very likely to be bfb. +! (See He and Ding, 2001, Journal of Supercomputing, 18, 259, +! https://link.springer.com/article/10.1023%2FA%3A1008153532043) + + real (dbl_kind), dimension(:,:), intent(in) :: & + array2 ! array to be summed + + real (dbl_kind), dimension(:), intent(inout) :: & + sums8 ! resulting global sum + + integer(int_kind), intent(in) :: & + mpicomm + + integer(int_kind), intent(in) :: & + numprocs + +!----------------------------------------------------------------------- +! +! local variables +! +!----------------------------------------------------------------------- + + real (real_kind), allocatable :: psums4(:) + real (real_kind), allocatable :: sums4(:) + real (dbl_kind) , allocatable :: psums8(:) +#ifndef NO_R16 + real (r16_kind) , allocatable :: psums16(:) + real (r16_kind) , allocatable :: sums16(:) +#endif + + integer (int_kind) :: ns,nf,i,j, ierr + + character(len=*), parameter :: subname = '(compute_sums_dbl)' + +!----------------------------------------------------------------------- + + sums8 = 0._dbl_kind + ns = size(array2,dim=1) + nf = size(array2,dim=2) + + if (bfbflag == 'off' .or. bfbflag == 'lsum8') then + allocate(psums8(nf)) + psums8(:) = 0._dbl_kind + + do j = 1, nf + do i = 1, ns + psums8(j) = psums8(j) + array2(i,j) + enddo + enddo + +#ifdef SERIAL_REMOVE_MPI + sums8 = psums8 +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(psums8, sums8, nf, mpiR8, MPI_SUM, mpicomm, ierr) + endif +#endif + + deallocate(psums8) + +#ifndef NO_R16 + elseif (bfbflag == 'lsum16') then + allocate(psums16(nf)) + psums16(:) = 0._r16_kind + allocate(sums16(nf)) + sums16(:) = 0._r16_kind + + do j = 1, nf + do i = 1, ns + psums16(j) = psums16(j) + real(array2(i,j),r16_kind) + enddo + enddo + +#ifdef SERIAL_REMOVE_MPI + sums16 = psums16 +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(psums16, sums16, nf, mpiR16, MPI_SUM, mpicomm, ierr) + endif +#endif + sums8 = real(sums16,dbl_kind) + + deallocate(psums16,sums16) +#endif + + elseif (bfbflag == 'lsum4') then + allocate(psums4(nf)) + psums4(:) = 0._real_kind + allocate(sums4(nf)) + sums4(:) = 0._real_kind + + do j = 1, nf + do i = 1, ns + psums4(j) = psums4(j) + real(array2(i,j),real_kind) + enddo + enddo + +#ifdef SERIAL_REMOVE_MPI + sums4 = psums4 +#else + if (my_task < numProcs) then + call MPI_ALLREDUCE(psums4, sums4, nf, mpiR4, MPI_SUM, mpicomm, ierr) + endif +#endif + sums8 = real(sums4,dbl_kind) + + deallocate(psums4,sums4) + + elseif (bfbflag == 'ddpdd') then + if (my_task < numProcs) then + call ice_reprosum_calc(array2,sums8,ns,ns,nf,ddpdd_sum=.true.,commid=mpicomm) + endif + + elseif (bfbflag == 'reprosum') then + if (my_task < numProcs) then + call ice_reprosum_calc(array2,sums8,ns,ns,nf,ddpdd_sum=.false.,commid=mpicomm) + endif + + else + call abort_ice(subname//'ERROR: bfbflag unknown '//trim(bfbflag)) + endif + +end subroutine compute_sums_dbl + !*********************************************************************** - end module ice_global_reductions +end module ice_global_reductions !||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| diff --git a/cicecore/cicedynB/infrastructure/comm/serial/ice_reprosum.F90 b/cicecore/cicedynB/infrastructure/comm/serial/ice_reprosum.F90 new file mode 100644 index 000000000..490b16b14 --- /dev/null +++ b/cicecore/cicedynB/infrastructure/comm/serial/ice_reprosum.F90 @@ -0,0 +1,1482 @@ + +!> Reproducible sum method from P. Worley +#define SERIAL_REMOVE_MPI + +MODULE ice_reprosum + +!----------------------------------------------------------------------- +! +! Purpose: +!> Compute reproducible global sums of a set of arrays across an MPI +!> subcommunicator +! +! Methods: +!> Compute using either or both a scalable, reproducible algorithm and a +!> scalable, nonreproducible algorithm: +!> * Reproducible (scalable): +!> Convert to fixed point (integer vector representation) to enable +!> reproducibility when using MPI_Allreduce +!> * Alternative usually reproducible (scalable): +!> Use parallel double-double algorithm due to Helen He and +!> Chris Ding, based on David Bailey's/Don Knuth's DDPDD algorithm +!> * Nonreproducible (scalable): +!> Floating point and MPI_Allreduce based. +!> If computing both reproducible and nonreproducible sums, compare +!> these and report relative difference (if absolute difference +!> less than sum) or absolute difference back to calling routine. +! +! Author: P. Worley (based on suggestions from J. White for fixed +! point algorithm and on He/Ding paper for ddpdd +! algorithm) +! +! Modified by T.Craig for CICE, March 2019 based on the public version in +! Oasis3-MCT_4.0. +! +!----------------------------------------------------------------------- + +!----------------------------------------------------------------------- +!- use statements ------------------------------------------------------ +!----------------------------------------------------------------------- +#if ( defined noI8 ) + ! Workaround for when shr_kind_i8 is not supported. + use ice_kinds_mod, only: r8 => dbl_kind, i8 => int_kind +#else + use ice_kinds_mod, only: r8 => dbl_kind, i8 => int8_kind +#endif + use ice_kinds_mod, only: char_len_long + use ice_fileunits, only: nu_diag + use ice_exit, only: abort_ice + +! internal timers not yet implemented, need to revisit if needed +! use ice_mpi, only: xicex_mpi_barrier +! use ice_timer, only: xicex_timer_start, xicex_timer_stop + +!----------------------------------------------------------------------- +!- module boilerplate -------------------------------------------------- +!----------------------------------------------------------------------- + implicit none + private + +!----------------------------------------------------------------------- +!- include statements -------------------------------------------------- +!----------------------------------------------------------------------- +#ifndef SERIAL_REMOVE_MPI +#include +#endif + +!----------------------------------------------------------------------- +! Public interfaces ---------------------------------------------------- +!----------------------------------------------------------------------- + public :: & + ice_reprosum_setopts, &! set runtime options + ice_reprosum_calc, &! calculate distributed sum + ice_reprosum_tolExceeded ! utility function to check relative + ! differences against the tolerance + +!----------------------------------------------------------------------- +! Public data ---------------------------------------------------------- +!----------------------------------------------------------------------- + logical, public :: ice_reprosum_recompute = .false. + + real(r8), public :: ice_reprosum_reldiffmax = -1.0_r8 + +!----------------------------------------------------------------------- +! Private interfaces --------------------------------------------------- +!----------------------------------------------------------------------- + private :: & + ddpdd, &! double-double sum routine + split_indices ! split indices among OMP threads + +!----------------------------------------------------------------------- +! Private data --------------------------------------------------------- +!----------------------------------------------------------------------- + + logical :: repro_sum_use_ddpdd = .false. + logical :: detailed_timing = .false. + character(len=char_len_long) :: tmpstr + + CONTAINS + +!======================================================================== +!----------------------------------------------------------------------- +! Purpose: +!> Set runtime options +! Author: P. Worley +!----------------------------------------------------------------------- + + subroutine ice_reprosum_setopts(repro_sum_use_ddpdd_in, & + repro_sum_rel_diff_max_in, & + repro_sum_recompute_in, & + repro_sum_master, & + repro_sum_logunit ) + +!------------------------------Arguments-------------------------------- + logical, intent(in), optional :: repro_sum_use_ddpdd_in + !< Use DDPDD algorithm instead of fixed precision algorithm + real(r8), intent(in), optional :: repro_sum_rel_diff_max_in + !< maximum permissible difference between reproducible and + !< nonreproducible sums + logical, intent(in), optional :: repro_sum_recompute_in + !< recompute using different algorithm when difference between + !< reproducible and nonreproducible sums is too great + logical, intent(in), optional :: repro_sum_master + !< flag indicating whether this process should output + !< log messages + integer, intent(in), optional :: repro_sum_logunit + !< unit number for log messages +!---------------------------Local Workspace----------------------------- + integer llogunit ! unit number for log messages + logical master ! local master? + logical,save :: firstcall = .true. ! first call + character(len=*),parameter :: subname = '(ice_reprosum_setopts)' +!----------------------------------------------------------------------- + + if ( present(repro_sum_master) ) then + master = repro_sum_master + else + master = .false. + endif + + if ( present(repro_sum_logunit) ) then + llogunit = repro_sum_logunit + else + llogunit = nu_diag + endif + + if (.not. firstcall) then + write(tmpstr,*) subname//' ERROR: can only be called once' + call abort_ice(tmpstr,file=__FILE__,line=__LINE__) + endif + firstcall = .false. + + if ( present(repro_sum_use_ddpdd_in) ) then + repro_sum_use_ddpdd = repro_sum_use_ddpdd_in + endif + if ( present(repro_sum_rel_diff_max_in) ) then + ice_reprosum_reldiffmax = repro_sum_rel_diff_max_in + endif + if ( present(repro_sum_recompute_in) ) then + ice_reprosum_recompute = repro_sum_recompute_in + endif + if (master) then + if ( repro_sum_use_ddpdd ) then + write(llogunit,*) subname, & + 'Using double-double-based (scalable) usually reproducible ', & + 'distributed sum algorithm' + else + write(llogunit,*) subname, & + 'Using fixed-point-based (scalable) reproducible ', & + 'distributed sum algorithm' + endif + + if (ice_reprosum_reldiffmax >= 0._r8) then + write(llogunit,*) subname, & + ' with a maximum relative error tolerance of ', & + ice_reprosum_reldiffmax + if (ice_reprosum_recompute) then + write(llogunit,*) subname, & + 'If tolerance exceeded, sum is recomputed using ', & + 'a serial algorithm.' + else + write(llogunit,*) subname, & + 'If tolerance exceeded, fixed-precision is sum used ', & + 'but a warning is output.' + endif + else + write(llogunit,*) subname, & + 'and not comparing with floating point algorithms.' + endif + + endif + end subroutine ice_reprosum_setopts + +!======================================================================== + +! +! Purpose: +!> Compute the global sum of each field in "arr" using the indicated +!> communicator with a reproducible yet scalable implementation based +!> on a fixed point algorithm. An alternative is to use an "almost +!> always reproducible" floating point algorithm. +! +! The accuracy of the fixed point algorithm is controlled by the +! number of "levels" of integer expansion. The algorithm will calculate +! the number of levels that is required for the sum to be essentially +! exact. The optional parameter arr_max_levels can be used to override +! the calculated value. The optional parameter arr_max_levels_out can be +! used to return the values used. +! +! The algorithm also requires an upper bound on +! the maximum summand (in absolute value) for each field, and will +! calculate this internally. However, if the optional parameters +! arr_max_levels and arr_gbl_max are both set, then the algorithm will +! use the values in arr_gbl_max for the upper bounds instead. If these +! are not upper bounds, or if the upper bounds are not tight enough +! to achieve the requisite accuracy, and if the optional parameter +! repro_sum_validate is NOT set to .false., the algorithm will repeat the +! computation with appropriate upper bounds. If only arr_gbl_max is present, +! then the maxima are computed internally (and the specified values are +! ignored). The optional parameter arr_gbl_max_out can be +! used to return the values used. +! +! Finally, the algorithm requires an upper bound on the number of +! local summands across all processes. This will be calculated internally, +! using an MPI collective, but the value in the optional argument +! gbl_max_nsummands will be used instead if (1) it is present, (2) +! it is > 0, and (3) the maximum value and required number of levels +! are also specified. (If the maximum value is calculated, the same +! MPI collective is used to determine the maximum number of local +! summands.) The accuracy of the user-specified value is not checked. +! However, if set to < 1, the value will instead be calculated. If the +! optional parameter gbl_max_nsummands_out is present, then the value +! used (gbl_max_nsummands if >= 1; calculated otherwise) will be +! returned. +! +! If requested (by setting ice_reprosum_reldiffmax >= 0.0 and passing in +! the optional rel_diff parameter), results are compared with a +! nonreproducible floating point algorithm. +! +! Note that the cost of the algorithm is not strongly correlated with +! the number of levels, which primarily shows up as a (modest) increase +! in cost of the MPI_Allreduce as a function of vector length. Rather the +! cost is more a function of (a) the number of integers required to +! represent an individual summand and (b) the number of MPI_Allreduce +! calls. The number of integers required to represent an individual +! summand is 1 or 2 when using 8-byte integers for 8-byte real summands +! when the number of local summands is not too large. As the number of +! local summands increases, the number of integers required increases. +! The number of MPI_Allreduce calls is either 2 (specifying nothing) or +! 1 (specifying gbl_max_nsummands, arr_max_levels, and arr_gbl_max +! correctly). When specifying arr_max_levels and arr_gbl_max +! incorrectly, 3 or 4 MPI_Allreduce calls will be required. +! +! The alternative algorithm is a minor modification of a parallel +! implementation of David Bailey's routine DDPDD by Helen He +! and Chris Ding. Bailey uses the Knuth trick to implement quadruple +! precision summation of double precision values with 10 double +! precision operations. The advantage of this algorithm is that +! it requires a single MPI_Allreduce and is less expensive per summand +! than is the fixed precision algorithm. The disadvantage is that it +! is not guaranteed to be reproducible (though it is reproducible +! much more often than is the standard algorithm). This alternative +! is used when the optional parameter ddpdd_sum is set to .true. It is +! also used if the fixed precision algorithm radix assumption does not +! hold. + +!---------------------------------------------------------------------- + subroutine ice_reprosum_calc (arr, arr_gsum, nsummands, dsummands, & + nflds, ddpdd_sum, & + arr_gbl_max, arr_gbl_max_out, & + arr_max_levels, arr_max_levels_out, & + gbl_max_nsummands, gbl_max_nsummands_out,& + gbl_count, repro_sum_validate, & + repro_sum_stats, rel_diff, commid ) +!---------------------------------------------------------------------- + +! Arguments + + integer, intent(in) :: nsummands !< number of local summands + integer, intent(in) :: dsummands !< declared first dimension + integer, intent(in) :: nflds !< number of fields + real(r8), intent(in) :: arr(dsummands,nflds) + !< input array + + real(r8), intent(out):: arr_gsum(nflds) + !< global means + + logical, intent(in), optional :: ddpdd_sum + !< use ddpdd algorithm instead + !< of fixed precision algorithm + + real(r8), intent(in), optional :: arr_gbl_max(nflds) + !< upper bound on max(abs(arr)) + + real(r8), intent(out), optional :: arr_gbl_max_out(nflds) + !< calculated upper bound on + !< max(abs(arr)) + + integer, intent(in), optional :: arr_max_levels(nflds) + !< maximum number of levels of + !< integer expansion to use + + integer, intent(out), optional :: arr_max_levels_out(nflds) + !< output of number of levels of + !< integer expansion to used + + integer, intent(in), optional :: gbl_max_nsummands + !< maximum of nsummand over all + !< processes + + integer, intent(out), optional :: gbl_max_nsummands_out + !< calculated maximum nsummands + !< over all processes + + integer, intent(in), optional :: gbl_count + !< was total number of summands; + !< now is ignored; use + !< gbl_max_nsummands instead + + logical, intent(in), optional :: repro_sum_validate + !< flag enabling/disabling testing that gmax and max_levels are + !< accurate/sufficient. Default is enabled. + + integer, intent(inout), optional :: repro_sum_stats(5) + !< increment running totals for + !< (1) one-reduction repro_sum + !< (2) two-reduction repro_sum + !< (3) both types in one call + !< (4) nonrepro_sum + !< (5) global max nsummands reduction + + real(r8), intent(out), optional :: rel_diff(2,nflds) + !< relative and absolute + !< differences between fixed + !< and floating point sums + + integer, intent(in), optional :: commid + !< MPI communicator + +! Local workspace + + logical :: use_ddpdd_sum ! flag indicating whether to + ! use ice_reprosum_ddpdd or not + logical :: recompute ! flag indicating need to + ! determine gmax/gmin before + ! computing sum + logical :: validate ! flag indicating need to + ! verify gmax and max_levels + ! are accurate/sufficient + integer :: omp_nthreads ! number of OpenMP threads + integer :: mpi_comm ! MPI subcommunicator + integer :: tasks ! number of MPI processes + integer :: mype ! MPI task rank + integer :: ierr ! MPI error return + integer :: ifld, isum, ithread ! loop variables + integer :: max_nsummands ! max nsummands over all processes + ! or threads (used in both ways) + + integer, allocatable :: isum_beg(:), isum_end(:) + ! range of summand indices for each + ! OpenMP thread + integer, allocatable :: arr_tlmin_exp(:,:) + ! per thread local exponent minima + integer, allocatable :: arr_tlmax_exp(:,:) + ! per thread local exponent maxima + integer :: arr_exp, arr_exp_tlmin, arr_exp_tlmax + ! summand exponent and working min/max + integer :: arr_lmin_exp(nflds) ! local exponent minima + integer :: arr_lmax_exp(nflds) ! local exponent maxima + integer :: arr_lextremes(0:nflds,2)! local exponent extrema + integer :: arr_gextremes(0:nflds,2)! global exponent extrema + + integer :: arr_gmax_exp(nflds) ! global exponents maxima + integer :: arr_gmin_exp(nflds) ! global exponents minima + integer :: arr_max_shift ! maximum safe exponent for + ! value < 1 (so that sum does + ! not overflow) + integer :: max_levels(nflds) ! maximum number of levels of + ! integer expansion to use + integer :: max_level ! maximum value in max_levels + integer :: gbl_max_red ! global max local sum reduction? (0/1) + integer :: repro_sum_fast ! 1 reduction repro_sum? (0/1) + integer :: repro_sum_slow ! 2 reduction repro_sum? (0/1) + integer :: repro_sum_both ! both fast and slow? (0/1) + integer :: nonrepro_sum ! nonrepro_sum? (0/1) + + real(r8) :: xmax_nsummands ! dble of max_nsummands + real(r8) :: arr_lsum(nflds) ! local sums + real(r8) :: arr_gsum_fast(nflds) ! global sum calculated using + ! fast, nonreproducible, + ! floating point alg. + real(r8) :: abs_diff ! absolute difference between + ! fixed and floating point + ! sums +#ifdef _OPENMP + integer omp_get_max_threads + external omp_get_max_threads +#endif + character(len=*),parameter :: subname = '(ice_reprosum_calc)' + +!----------------------------------------------------------------------- + +! check whether should use ice_reprosum_ddpdd algorithm + use_ddpdd_sum = repro_sum_use_ddpdd + if ( present(ddpdd_sum) ) then + use_ddpdd_sum = ddpdd_sum + endif + +! check whether intrinsic-based algorithm will work on this system +! (requires floating point and integer bases to be the same) +! If not, always use ddpdd. + use_ddpdd_sum = use_ddpdd_sum .or. (radix(0._r8) /= radix(0_i8)) + +! initialize local statistics variables + gbl_max_red = 0 + repro_sum_fast = 0 + repro_sum_slow = 0 + repro_sum_both = 0 + nonrepro_sum = 0 + +! set MPI communicator + if ( present(commid) ) then + mpi_comm = commid + else +#ifdef SERIAL_REMOVE_MPI + mpi_comm = 0 +#else + mpi_comm = MPI_COMM_WORLD +#endif + endif + +! if (detailed_timing) then +! call xicex_timer_start('xicex_reprosum_prebarrier') +! call xicex_mpi_barrier(mpi_comm,subname) +! call xicex_timer_stop ('xicex_reprosum_prebarrier') +! endif + + if ( use_ddpdd_sum ) then + +! if (detailed_timing) call xicex_timer_start('ice_reprosum_ddpdd') + + call ice_reprosum_ddpdd(arr, arr_gsum, nsummands, dsummands, & + nflds, mpi_comm) + repro_sum_fast = 1 + +! if (detailed_timing) call xicex_timer_stop('ice_reprosum_ddpdd') + + else + +! if (detailed_timing) call xicex_timer_start('ice_reprosum_int') + +! get number of MPI tasks +#ifdef SERIAL_REMOVE_MPI + tasks = 1 + mype = 0 +#else + call mpi_comm_size(mpi_comm, tasks, ierr) + call mpi_comm_rank(mpi_comm, mype, ierr) +#endif + +! get number of OpenMP threads +#ifdef _OPENMP + omp_nthreads = omp_get_max_threads() +#else + omp_nthreads = 1 +#endif + +! see if have sufficient information to not require max/min allreduce + recompute = .true. + validate = .false. + if ( present(arr_gbl_max) .and. present(arr_max_levels) ) then + recompute = .false. + +! setting lower bound on max_level*nflds to be 64 to improve OpenMP +! performance for loopb in ice_reprosum_int + max_level = (64/nflds) + 1 + do ifld=1,nflds + if ((arr_gbl_max(ifld) .ge. 0.0_r8) .and. & + (arr_max_levels(ifld) > 0)) then + + arr_gmax_exp(ifld) = exponent(arr_gbl_max(ifld)) + if (max_level < arr_max_levels(ifld)) & + max_level = arr_max_levels(ifld) + + else + recompute = .true. + endif + enddo + + if (.not. recompute) then + +! determine maximum number of summands in local phases of the +! algorithm +! if (detailed_timing) call xicex_timer_start("repro_sum_allr_max") + if ( present(gbl_max_nsummands) ) then + if (gbl_max_nsummands < 1) then +#ifdef SERIAL_REMOVE_MPI + max_nsummands = nsummands +#else + call mpi_allreduce (nsummands, max_nsummands, 1, & + MPI_INTEGER, MPI_MAX, mpi_comm, ierr) +#endif + gbl_max_red = 1 + else + max_nsummands = gbl_max_nsummands + endif + else +#ifdef SERIAL_REMOVE_MPI + max_nsummands = nsummands +#else + call mpi_allreduce (nsummands, max_nsummands, 1, & + MPI_INTEGER, MPI_MAX, mpi_comm, ierr) +#endif + gbl_max_red = 1 + endif +! if (detailed_timing) call xicex_timer_stop("repro_sum_allr_max") + +! determine maximum shift. Shift needs to be small enough that summation +! does not exceed maximum number of digits in i8. + +! if requested, return max_nsummands before it is redefined + if ( present( gbl_max_nsummands_out) ) then + gbl_max_nsummands_out = max_nsummands + endif + +! summing within each thread first + max_nsummands = (max_nsummands/omp_nthreads) + 1 +! then over threads and tasks + max_nsummands = max(max_nsummands, tasks*omp_nthreads) + + xmax_nsummands = dble(max_nsummands) + arr_max_shift = digits(0_i8) - (exponent(xmax_nsummands) + 1) + if (arr_max_shift < 2) then + write(tmpstr,*) subname//' ERROR: number of summands too large for fixed precision algorithm' + call abort_ice(tmpstr,file=__FILE__,line=__LINE__) + endif + +! calculate sum + if (present(repro_sum_validate)) then + validate = repro_sum_validate + else + validate = .true. + endif + call ice_reprosum_int(arr, arr_gsum, nsummands, dsummands, & + nflds, arr_max_shift, arr_gmax_exp, & + arr_max_levels, max_level, validate, & + recompute, omp_nthreads, mpi_comm) + +! record statistics, etc. + repro_sum_fast = 1 + if (recompute) then + repro_sum_both = 1 + else +! if requested, return specified levels and upper bounds on maxima + if ( present(arr_max_levels_out) ) then + do ifld=1,nflds + arr_max_levels_out(ifld) = arr_max_levels(ifld) + enddo + endif + if ( present(arr_gbl_max_out) ) then + do ifld=1,nflds + arr_gbl_max_out(ifld) = arr_gbl_max(ifld) + enddo + endif + endif + endif + endif + +! do not have sufficient information; calculate global max/min and +! use to compute required number of levels + if (recompute) then + +! record statistic + repro_sum_slow = 1 + +! determine maximum and minimum (non-zero) summand values and +! maximum number of local summands + +! allocate thread-specific work space + allocate(arr_tlmax_exp(nflds,omp_nthreads)) + allocate(arr_tlmin_exp(nflds,omp_nthreads)) + allocate(isum_beg(omp_nthreads)) + allocate(isum_end(omp_nthreads)) + +! split summand index range over OpenMP threads + call split_indices(nsummands, omp_nthreads, isum_beg, isum_end) + +!$omp parallel do & +!$omp default(shared) & +!$omp private(ithread, ifld, isum, arr_exp, arr_exp_tlmin, arr_exp_tlmax) + do ithread=1,omp_nthreads +! if (detailed_timing) call xicex_timer_start('repro_sum_loopa') + do ifld=1,nflds + arr_exp_tlmin = MAXEXPONENT(1._r8) + arr_exp_tlmax = MINEXPONENT(1._r8) + do isum=isum_beg(ithread),isum_end(ithread) + if (arr(isum,ifld) .ne. 0.0_r8) then + arr_exp = exponent(arr(isum,ifld)) + arr_exp_tlmin = min(arr_exp,arr_exp_tlmin) + arr_exp_tlmax = max(arr_exp,arr_exp_tlmax) + endif + end do + arr_tlmin_exp(ifld,ithread) = arr_exp_tlmin + arr_tlmax_exp(ifld,ithread) = arr_exp_tlmax + end do +! if (detailed_timing) call xicex_timer_stop('repro_sum_loopa') + end do + + do ifld=1,nflds + arr_lmax_exp(ifld) = maxval(arr_tlmax_exp(ifld,:)) + arr_lmin_exp(ifld) = minval(arr_tlmin_exp(ifld,:)) + end do + deallocate(arr_tlmin_exp,arr_tlmax_exp,isum_beg,isum_end) + + arr_lextremes(0,:) = -nsummands + arr_lextremes(1:nflds,1) = -arr_lmax_exp(:) + arr_lextremes(1:nflds,2) = arr_lmin_exp(:) +! if (detailed_timing) call xicex_timer_start("repro_sum_allr_minmax") +#ifdef SERIAL_REMOVE_MPI + arr_gextremes = arr_lextremes +#else + call mpi_allreduce (arr_lextremes, arr_gextremes, 2*(nflds+1), & + MPI_INTEGER, MPI_MIN, mpi_comm, ierr) +#endif +! if (detailed_timing) call xicex_timer_stop("repro_sum_allr_minmax") + max_nsummands = -arr_gextremes(0,1) + arr_gmax_exp(:) = -arr_gextremes(1:nflds,1) + arr_gmin_exp(:) = arr_gextremes(1:nflds,2) + +! if a field is identically zero, arr_gmin_exp still equals MAXEXPONENT +! and arr_gmax_exp still equals MINEXPONENT. In this case, set +! arr_gmin_exp = arr_gmax_exp = MINEXPONENT + do ifld=1,nflds + arr_gmin_exp(ifld) = min(arr_gmax_exp(ifld),arr_gmin_exp(ifld)) + enddo + +! if requested, return upper bounds on observed maxima + if ( present(arr_gbl_max_out) ) then + do ifld=1,nflds + arr_gbl_max_out(ifld) = scale(1.0_r8,arr_gmax_exp(ifld)) + enddo + endif + +! if requested, return max_nsummands before it is redefined + if ( present( gbl_max_nsummands_out) ) then + gbl_max_nsummands_out = max_nsummands + endif + +! determine maximum shift (same as in previous branch, but with calculated +! max_nsummands). Shift needs to be small enough that summation does not +! exceed maximum number of digits in i8. + +! summing within each thread first + max_nsummands = (max_nsummands/omp_nthreads) + 1 +! then over threads and tasks + max_nsummands = max(max_nsummands, tasks*omp_nthreads) + + xmax_nsummands = dble(max_nsummands) + arr_max_shift = digits(0_i8) - (exponent(xmax_nsummands) + 1) + if (arr_max_shift < 2) then + write(tmpstr,*) subname//' ERROR: number of summands too large for fixed precision algorithm' + call abort_ice(tmpstr,file=__FILE__,line=__LINE__) + endif + +! determine maximum number of levels required for each field +! ((digits(0_i8) + (arr_gmax_exp(ifld)-arr_gmin_exp(ifld))) / arr_max_shift) +! + 1 because first truncation probably does not involve a maximal shift +! + 1 to guarantee that the integer division rounds up (not down) +! (setting lower bound on max_level*nflds to be 64 to improve OpenMP +! performance for loopb in ice_reprosum_int) + max_level = (64/nflds) + 1 + do ifld=1,nflds + max_levels(ifld) = 2 + & + ((digits(0_i8) + (arr_gmax_exp(ifld)-arr_gmin_exp(ifld))) & + / arr_max_shift) + if ( present(arr_max_levels) .and. (.not. validate) ) then +! if validate true, then computation with arr_max_levels failed +! previously + if ( arr_max_levels(ifld) > 0 ) then + max_levels(ifld) = & + min(arr_max_levels(ifld),max_levels(ifld)) + endif + endif + if (max_level < max_levels(ifld)) & + max_level = max_levels(ifld) + enddo + +! if requested, return calculated levels + if ( present(arr_max_levels_out) ) then + do ifld=1,nflds + arr_max_levels_out(ifld) = max_levels(ifld) + enddo + endif + +! calculate sum + validate = .false. + call ice_reprosum_int(arr, arr_gsum, nsummands, dsummands, nflds, & + arr_max_shift, arr_gmax_exp, max_levels, & + max_level, validate, recompute, & + omp_nthreads, mpi_comm) + + endif + +! if (detailed_timing) call xicex_timer_stop('ice_reprosum_int') + + endif + +! compare fixed and floating point results + if ( present(rel_diff) ) then + if (ice_reprosum_reldiffmax >= 0.0_r8) then + +! if (detailed_timing) then +! call xicex_timer_start('xicex_nonreprosum_prebarrier') +! call xicex_mpi_barrier(mpi_comm,subname) +! call xicex_timer_stop ('xicex_nonreprosum_prebarrier') +! endif + +! if (detailed_timing) call xicex_timer_start('nonrepro_sum') +! record statistic + nonrepro_sum = 1 +! compute nonreproducible sum + arr_lsum(:) = 0._r8 +!$omp parallel do & +!$omp default(shared) & +!$omp private(ifld, isum) + do ifld=1,nflds + do isum=1,nsummands + arr_lsum(ifld) = arr(isum,ifld) + arr_lsum(ifld) + end do + end do + +#ifdef SERIAL_REMOVE_MPI + arr_gsum_fast = arr_lsum +#else + call mpi_allreduce (arr_lsum, arr_gsum_fast, nflds, & + MPI_REAL8, MPI_SUM, mpi_comm, ierr) +#endif + +! if (detailed_timing) call xicex_timer_stop('nonrepro_sum') + +! determine differences +!$omp parallel do & +!$omp default(shared) & +!$omp private(ifld, abs_diff) + do ifld=1,nflds + abs_diff = abs(arr_gsum_fast(ifld)-arr_gsum(ifld)) + if (abs(arr_gsum(ifld)) > abs_diff) then + rel_diff(1,ifld) = abs_diff/abs(arr_gsum(ifld)) + else + rel_diff(1,ifld) = abs_diff + endif + rel_diff(2,ifld) = abs_diff + enddo + else + rel_diff(:,:) = 0.0_r8 + endif + endif + +! return statistics + if ( present(repro_sum_stats) ) then + repro_sum_stats(1) = repro_sum_stats(1) + repro_sum_fast + repro_sum_stats(2) = repro_sum_stats(2) + repro_sum_slow + repro_sum_stats(3) = repro_sum_stats(3) + repro_sum_both + repro_sum_stats(4) = repro_sum_stats(4) + nonrepro_sum + repro_sum_stats(5) = repro_sum_stats(5) + gbl_max_red + endif + + + end subroutine ice_reprosum_calc + +!======================================================================== +! +! Purpose: +!> Compute the global sum of each field in "arr" using the indicated +!> communicator with a reproducible yet scalable implementation based +!> on a fixed point algorithm. The accuracy of the fixed point algorithm +!> is controlled by the number of "levels" of integer expansion, the +!> maximum value of which is specified by max_level. +! +!---------------------------------------------------------------------- + + subroutine ice_reprosum_int (arr, arr_gsum, nsummands, dsummands, nflds, & + arr_max_shift, arr_gmax_exp, max_levels, & + max_level, validate, recompute, & + omp_nthreads, mpi_comm ) + +!---------------------------------------------------------------------- + +! Arguments + + integer, intent(in) :: nsummands !< number of local summands + integer, intent(in) :: dsummands !< declared first dimension + integer, intent(in) :: nflds !< number of fields + integer, intent(in) :: arr_max_shift !< maximum safe exponent for + !< value < 1 (so that sum + !< does not overflow) + integer, intent(in) :: arr_gmax_exp(nflds) + !< exponents of global maxima + integer, intent(in) :: max_levels(nflds) + !< maximum number of levels + !< of integer expansion + integer, intent(in) :: max_level !< maximum value in + !< max_levels + integer, intent(in) :: omp_nthreads !< number of OpenMP threads + integer, intent(in) :: mpi_comm !< MPI subcommunicator + + real(r8), intent(in) :: arr(dsummands,nflds) + !< input array + + logical, intent(in):: validate + !< flag indicating that accuracy of solution generated from + !< arr_gmax_exp and max_levels should be tested + + logical, intent(out):: recompute + !< flag indicating that either the upper bounds are inaccurate, + !< or max_levels and arr_gmax_exp do not generate accurate + !< enough sums + + real(r8), intent(out):: arr_gsum(nflds) !< global means + +! Local workspace + + integer, parameter :: max_jlevel = & + 1 + (digits(0_i8)/digits(0.0_r8)) + + integer(i8) :: i8_arr_tlsum_level(0:max_level,nflds,omp_nthreads) + ! integer vector representing local + ! sum (per thread, per field) + integer(i8) :: i8_arr_lsum_level((max_level+3)*nflds) + ! integer vector representing local + ! sum + integer(i8) :: i8_arr_level ! integer part of summand for current + ! expansion level + integer(i8) :: i8_arr_gsum_level((max_level+3)*nflds) + ! integer vector representing global + ! sum + integer(i8) :: IX_8 ! integer representation of current + ! jlevels of X_8 ('part' of + ! i8_arr_gsum_level) + integer(i8) :: i8_sign ! sign global sum + integer(i8) :: i8_radix ! radix for i8 variables + + integer :: max_error(nflds,omp_nthreads) + ! accurate upper bound on data? + integer :: not_exact(nflds,omp_nthreads) + ! max_levels sufficient to + ! capture all digits? + integer :: isum_beg(omp_nthreads), isum_end(omp_nthreads) + ! range of summand indices for each + ! OpenMP thread + integer :: ifld, isum, ithread + ! loop variables + integer :: arr_exp ! exponent of summand + integer :: arr_shift ! exponent used to generate integer + ! for current expansion level + integer :: ilevel ! current integer expansion level + integer :: offset(nflds) ! beginning location in + ! i8_arr_{g,l}sum_level for integer + ! expansion of current ifld + integer :: voffset ! modification to offset used to + ! include validation metrics + integer :: ioffset ! offset(ifld) + integer :: jlevel ! number of floating point 'pieces' + ! extracted from a given i8 integer + integer :: ierr ! MPI error return + integer :: LX(max_jlevel) ! exponent of X_8 (see below) + integer :: veclth ! total length of i8_arr_lsum_level + integer :: sum_digits ! lower bound on number of significant + ! in integer expansion of sum + integer :: curr_exp ! exponent of partial sum during + ! reconstruction from integer vector + integer :: corr_exp ! exponent of current summand in + ! reconstruction from integer vector + + real(r8) :: arr_frac ! fraction of summand + real(r8) :: arr_remainder ! part of summand remaining after + ! current level of integer expansion + real(r8) :: X_8(max_jlevel) ! r8 vector representation of current + ! i8_arr_gsum_level + real(r8) :: RX_8 ! r8 representation of difference + ! between current i8_arr_gsum_level + ! and current jlevels of X_8 + ! (== IX_8). Also used in final + ! scaling step + + logical :: first ! flag used to indicate that just + ! beginning reconstruction of sum + ! from integer vector + + character(len=*),parameter :: subname = '(ice_reprosum_int)' + +!----------------------------------------------------------------------- +! Save radix of i8 variables in an i8 variable + i8_radix = radix(IX_8) + +! If validating upper bounds, reserve space for validation metrics +! In both cases, reserve an extra level for overflow from the top level + if (validate) then + voffset = 3 + else + voffset = 1 + endif + +! compute offsets for each field + offset(1) = voffset + do ifld=2,nflds + offset(ifld) = offset(ifld-1) & + + (max_levels(ifld-1) + voffset) + enddo + veclth = offset(nflds) + max_levels(nflds) + +! split summand index range over OpenMP threads + call split_indices(nsummands, omp_nthreads, isum_beg, isum_end) + +! convert local summands to vector of integers and sum +! (Using scale instead of set_exponent because arr_remainder may not be +! "normal" after level 1 calculation) + i8_arr_lsum_level(:) = 0_i8 + +!$omp parallel do & +!$omp default(shared) & +!$omp private(ithread, ifld, ioffset, isum, arr_frac, arr_exp, & +!$omp arr_shift, ilevel, i8_arr_level, arr_remainder, RX_8, IX_8) + do ithread=1,omp_nthreads +! if (detailed_timing) call xicex_timer_start('repro_sum_loopb') + do ifld=1,nflds + ioffset = offset(ifld) + + max_error(ifld,ithread) = 0 + not_exact(ifld,ithread) = 0 + + i8_arr_tlsum_level(:,ifld,ithread) = 0_i8 + do isum=isum_beg(ithread),isum_end(ithread) + arr_remainder = 0.0_r8 + + if (arr(isum,ifld) .ne. 0.0_r8) then + arr_exp = exponent(arr(isum,ifld)) + arr_frac = fraction(arr(isum,ifld)) + +! test that global maximum upper bound is an upper bound + if (arr_exp > arr_gmax_exp(ifld)) then + max_error(ifld,ithread) = 1 + exit + endif + +! calculate first shift + arr_shift = arr_max_shift - (arr_gmax_exp(ifld)-arr_exp) + +! determine first (probably) nonzero level (assuming initial fraction is +! 'normal' - algorithm still works if this is not true) +! NOTE: this is critical; scale will set to zero if min exponent is too small. + if (arr_shift < 1) then + ilevel = (1 + (arr_gmax_exp(ifld)-arr_exp))/arr_max_shift + arr_shift = ilevel*arr_max_shift - (arr_gmax_exp(ifld)-arr_exp) + + do while (arr_shift < 1) + arr_shift = arr_shift + arr_max_shift + ilevel = ilevel + 1 + enddo + else + ilevel = 1 + endif + + if (ilevel .le. max_levels(ifld)) then +! apply first shift/truncate, add it to the relevant running +! sum, and calculate the remainder. + arr_remainder = scale(arr_frac,arr_shift) + i8_arr_level = int(arr_remainder,i8) + i8_arr_tlsum_level(ilevel,ifld,ithread) = & + i8_arr_tlsum_level(ilevel,ifld,ithread) + i8_arr_level + arr_remainder = arr_remainder - i8_arr_level + +! while the remainder is non-zero, continue to shift, truncate, +! sum, and calculate new remainder + do while ((arr_remainder .ne. 0.0_r8) & + .and. (ilevel < max_levels(ifld))) + ilevel = ilevel + 1 + arr_remainder = scale(arr_remainder,arr_max_shift) + i8_arr_level = int(arr_remainder,i8) + i8_arr_tlsum_level(ilevel,ifld,ithread) = & + i8_arr_tlsum_level(ilevel,ifld,ithread) + i8_arr_level + arr_remainder = arr_remainder - i8_arr_level + enddo + + endif + endif + + if (arr_remainder .ne. 0.0_r8) then + not_exact(ifld,ithread) = 1 + endif + + enddo + +! postprocess integer vector to eliminate potential for overlap in the following +! sums over threads and processes: if value larger than or equal to +! (radix(IX_8)**arr_max_shift), add this 'overlap' to next larger integer in +! vector, resulting in nonoverlapping ranges for each component. Note that +! "ilevel-1==0" corresponds to an extra level used to guarantee that the sums +! over threads and processes do not overflow for ilevel==1. + do ilevel=max_levels(ifld),1,-1 + RX_8 = i8_arr_tlsum_level(ilevel,ifld,ithread) + IX_8 = int(scale(RX_8,-arr_max_shift),i8) + if (IX_8 .ne. 0_i8) then + i8_arr_tlsum_level(ilevel-1,ifld,ithread) = & + i8_arr_tlsum_level(ilevel-1,ifld,ithread) + IX_8 + IX_8 = IX_8*(i8_radix**arr_max_shift) + i8_arr_tlsum_level(ilevel,ifld,ithread) = & + i8_arr_tlsum_level(ilevel,ifld,ithread) - IX_8 + endif + enddo + enddo +! if (detailed_timing) call xicex_timer_stop('repro_sum_loopb') + enddo + +! sum contributions from different threads + do ifld=1,nflds + ioffset = offset(ifld) + do ithread = 1,omp_nthreads + do ilevel = 0,max_levels(ifld) + i8_arr_lsum_level(ioffset+ilevel) = & + i8_arr_lsum_level(ioffset+ilevel) & + + i8_arr_tlsum_level(ilevel,ifld,ithread) + enddo + enddo + enddo + +! record if upper bound was inaccurate or if level expansion stopped +! before full accuracy was achieved + if (validate) then + do ifld=1,nflds + ioffset = offset(ifld) + i8_arr_lsum_level(ioffset-voffset+1) = maxval(max_error(ifld,:)) + i8_arr_lsum_level(ioffset-voffset+2) = maxval(not_exact(ifld,:)) + enddo + endif + +! sum integer vector element-wise +#ifdef SERIAL_REMOVE_MPI + i8_arr_gsum_level = i8_arr_lsum_level +#else +#if ( defined noI8 ) + ! Workaround for when i8 is not supported. +! if (detailed_timing) call xicex_timer_start("repro_sum_allr_i4") + call mpi_allreduce (i8_arr_lsum_level, i8_arr_gsum_level, & + veclth, MPI_INTEGER, MPI_SUM, mpi_comm, ierr) +! if (detailed_timing) call xicex_timer_stop("repro_sum_allr_i4") +#else +! if (detailed_timing) call xicex_timer_start("repro_sum_allr_i8") + call mpi_allreduce (i8_arr_lsum_level, i8_arr_gsum_level, & + veclth, MPI_INTEGER8, MPI_SUM, mpi_comm, ierr) +! if (detailed_timing) call xicex_timer_stop("repro_sum_allr_i8") +#endif +#endif + +! Construct global sum from integer vector representation: +! 1) arr_max_shift is the shift applied to fraction(arr_gmax) . +! When shifting back, need to "add back in" true arr_gmax exponent. This was +! removed implicitly by working only with the fraction . +! 2) want to add levels into sum in reverse order (smallest to largest). However, +! even this can generate floating point rounding errors if signs of integers +! alternate. To avoid this, do some arithmetic with integer vectors so that all +! components have the same sign. This should keep relative difference between +! using different integer sizes (e.g. i8 and i4) to machine epsilon +! 3) assignment to X_8 will usually lose accuracy since maximum integer +! size is greater than the max number of 'digits' in r8 value (if xmax_nsummands +! correction not very large). Calculate remainder and add in first (since +! smaller). One correction is sufficient for r8 (53 digits) and i8 (63 digits). +! For r4 (24 digits) may need to correct twice. Code is written in a general +! fashion, to work no matter how many corrections are necessary (assuming +! max_jlevel parameter calculation is correct). + + recompute = .false. + do ifld=1,nflds + arr_gsum(ifld) = 0.0_r8 + ioffset = offset(ifld) + +! if validate is .true., test whether the summand upper bound +! was exceeded on any of the processes + if (validate) then + if (i8_arr_gsum_level(ioffset-voffset+1) .ne. 0_i8) then + recompute = .true. + endif + endif + + if (.not. recompute) then + +! preprocess integer vector: +! a) if value larger than or equal to (radix(IX_8)**arr_max_shift), add this 'overlap' +! to next larger integer in vector, resulting in nonoverlapping ranges for each +! component. Note that have "ilevel-1=0" level here as described above. + do ilevel=max_levels(ifld),1,-1 + RX_8 = i8_arr_gsum_level(ioffset+ilevel) + IX_8 = int(scale(RX_8,-arr_max_shift),i8) + if (IX_8 .ne. 0_i8) then + i8_arr_gsum_level(ioffset+ilevel-1) = i8_arr_gsum_level(ioffset+ilevel-1) & + + IX_8 + IX_8 = IX_8*(i8_radix**arr_max_shift) + i8_arr_gsum_level(ioffset+ilevel) = i8_arr_gsum_level(ioffset+ilevel) & + - IX_8 + endif + enddo +! b) subtract +/- 1 from larger and add +/- 1 to smaller when necessary +! so that all vector components have the same sign (eliminating loss +! of accuracy arising from difference of large values when +! reconstructing r8 sum from integer vector) + ilevel = 0 + do while ((i8_arr_gsum_level(ioffset+ilevel) .eq. 0_i8) & + .and. (ilevel < max_levels(ifld))) + ilevel = ilevel + 1 + enddo +! + if (ilevel < max_levels(ifld)) then + if (i8_arr_gsum_level(ioffset+ilevel) > 0_i8) then + i8_sign = 1_i8 + else + i8_sign = -1_i8 + endif + do jlevel=ilevel,max_levels(ifld)-1 + if (sign(1_i8,i8_arr_gsum_level(ioffset+jlevel)) & + .ne. sign(1_i8,i8_arr_gsum_level(ioffset+jlevel+1))) then + i8_arr_gsum_level(ioffset+jlevel) = i8_arr_gsum_level(ioffset+jlevel) & + - i8_sign + i8_arr_gsum_level(ioffset+jlevel+1) = i8_arr_gsum_level(ioffset+jlevel+1) & + + i8_sign*(i8_radix**arr_max_shift) + endif + enddo + endif + +! start with maximum shift, and work up to larger values + arr_shift = arr_gmax_exp(ifld) & + - max_levels(ifld)*arr_max_shift + curr_exp = 0 + first = .true. + do ilevel=max_levels(ifld),0,-1 + + if (i8_arr_gsum_level(ioffset+ilevel) .ne. 0_i8) then + jlevel = 1 + +! r8 representation of higher order bits in integer + X_8(jlevel) = i8_arr_gsum_level(ioffset+ilevel) + LX(jlevel) = exponent(X_8(jlevel)) + +! calculate remainder + IX_8 = int(X_8(jlevel),i8) + RX_8 = (i8_arr_gsum_level(ioffset+ilevel) - IX_8) + +! repeat using remainder + do while (RX_8 .ne. 0.0_r8) + jlevel = jlevel + 1 + X_8(jlevel) = RX_8 + LX(jlevel) = exponent(RX_8) + IX_8 = IX_8 + int(RX_8,i8) + RX_8 = (i8_arr_gsum_level(ioffset+ilevel) - IX_8) + enddo + +! add in contributions, smaller to larger, rescaling for each +! addition to guarantee that exponent of working summand is always +! larger than minexponent + do while (jlevel > 0) + if (first) then + curr_exp = LX(jlevel) + arr_shift + arr_gsum(ifld) = fraction(X_8(jlevel)) + first = .false. + else + corr_exp = curr_exp - (LX(jlevel) + arr_shift) + arr_gsum(ifld) = fraction(X_8(jlevel)) & + + scale(arr_gsum(ifld),corr_exp) + curr_exp = LX(jlevel) + arr_shift + endif + jlevel = jlevel - 1 + enddo + + endif + + arr_shift = arr_shift + arr_max_shift + enddo + +! apply final exponent correction, scaling first if exponent is too small +! to apply directly + corr_exp = curr_exp + exponent(arr_gsum(ifld)) + if (corr_exp .ge. MINEXPONENT(1._r8)) then + arr_gsum(ifld) = set_exponent(arr_gsum(ifld),corr_exp) + else + RX_8 = set_exponent(arr_gsum(ifld), & + corr_exp-MINEXPONENT(1._r8)) + arr_gsum(ifld) = scale(RX_8,MINEXPONENT(1._r8)) + endif + +! if validate is .true. and some precision lost, test whether 'too much' +! was lost, due to too loose an upper bound, too stringent a limit on number +! of levels of expansion, cancellation, .... Calculated by comparing lower +! bound on number of sigificant digits with number of digits in 1.0_r8 . + if (validate) then + if (i8_arr_gsum_level(ioffset-voffset+2) .ne. 0_i8) then + +! find first nonzero level and use exponent for this level, then assume all +! subsequent levels contribute arr_max_shift digits. + sum_digits = 0 + do ilevel=0,max_levels(ifld) + if (sum_digits .eq. 0) then + if (i8_arr_gsum_level(ioffset+ilevel) .ne. 0_i8) then + X_8(1) = i8_arr_gsum_level(ioffset+ilevel) + LX(1) = exponent(X_8(1)) + sum_digits = LX(1) + endif + else + sum_digits = sum_digits + arr_max_shift + endif + enddo + + if (sum_digits < digits(1.0_r8)) then + recompute = .true. + endif + endif + endif + + endif + + enddo + + + end subroutine ice_reprosum_int + +!======================================================================== +! +! Purpose: +!> Test whether distributed sum exceeds tolerance and print out a +!> warning message. +! +!---------------------------------------------------------------------- + + logical function ice_reprosum_tolExceeded (name, nflds, master, & + logunit, rel_diff ) +!---------------------------------------------------------------------- + +! Arguments + + character(len=*), intent(in) :: name !< distributed sum identifier + integer, intent(in) :: nflds !< number of fields + logical, intent(in) :: master !< process that will write + !< warning messages? + integer, optional, intent(in) :: logunit!< unit warning messages + !< written to + real(r8), intent(in) :: rel_diff(2,nflds) + !< relative and absolute + !< differences between fixed + !< and floating point sums + +! Local workspace + + integer :: llogunit ! local log unit + integer :: ifld ! field index + integer :: exceeds_limit ! number of fields whose + ! sum exceeds tolerance + real(r8) :: max_rel_diff ! maximum relative difference + integer :: max_rel_diff_idx ! field index for max. rel. diff. + real(r8) :: max_abs_diff ! maximum absolute difference + integer :: max_abs_diff_idx ! field index for max. abs. diff. + character(len=*),parameter :: subname = '(ice_reprosum_tolExceeded)' + +!----------------------------------------------------------------------- + + ice_reprosum_tolExceeded = .false. + if (ice_reprosum_reldiffmax < 0.0_r8) return + + if ( present(logunit) ) then + llogunit = logunit + else + llogunit = nu_diag + endif + + ! check that "fast" reproducible sum is accurate enough. + exceeds_limit = 0 + max_rel_diff = 0.0_r8 + max_abs_diff = 0.0_r8 + do ifld=1,nflds + if (rel_diff(1,ifld) > ice_reprosum_reldiffmax) then + exceeds_limit = exceeds_limit + 1 + if (rel_diff(1,ifld) > max_rel_diff) then + max_rel_diff = rel_diff(1,ifld) + max_rel_diff_idx = ifld + endif + if (rel_diff(2,ifld) > max_abs_diff) then + max_abs_diff = rel_diff(2,ifld) + max_abs_diff_idx = ifld + endif + endif + enddo + + if (exceeds_limit > 0) then + if (master) then + write(llogunit,*) subname,trim(name), & + ': difference in fixed and floating point sums ', & + ' exceeds tolerance in ', exceeds_limit, & + ' fields.' + write(llogunit,*) subname,' Maximum relative diff: (rel)', & + rel_diff(1,max_rel_diff_idx), ' (abs) ', & + rel_diff(2,max_rel_diff_idx) + write(llogunit,*) subname,' Maximum absolute diff: (rel)', & + rel_diff(1,max_abs_diff_idx), ' (abs) ', & + rel_diff(2,max_abs_diff_idx) + endif + ice_reprosum_tolExceeded = .true. + endif + + + end function ice_reprosum_tolExceeded + +!======================================================================== +! +! Purpose: +!> Compute the global sum of each field in "arr" using the indicated +!> communicator with a reproducible yet scalable implementation based +!> on He and Ding's implementation of the double-double algorithm. +! +!---------------------------------------------------------------------- + + subroutine ice_reprosum_ddpdd (arr, arr_gsum, nsummands, dsummands, & + nflds, mpi_comm ) +!---------------------------------------------------------------------- + +! Arguments + + integer, intent(in) :: nsummands !< number of local summands + integer, intent(in) :: dsummands !< declared first dimension + integer, intent(in) :: nflds !< number of fields + real(r8), intent(in) :: arr(dsummands,nflds) + !< input array + integer, intent(in) :: mpi_comm !< MPI subcommunicator + + real(r8), intent(out):: arr_gsum(nflds) + !< global sums + + +! Local workspace + + integer :: old_cw ! for x86 processors, save + ! current arithmetic mode + integer :: ifld, isum ! loop variables + integer :: ierr ! MPI error return + + real(r8) :: e, t1, t2 ! temporaries + complex(r8) :: arr_lsum_dd(nflds) ! local sums (in double-double + ! format) + complex(r8) :: arr_gsum_dd(nflds) ! global sums (in double-double + ! format) + + integer, save :: mpi_sumdd + logical, save :: first_time = .true. + character(len=*),parameter :: subname = '(ice_reprosum_ddpdd)' + +!----------------------------------------------------------------------- + + call ice_shr_reprosumx86_fix_start (old_cw) + + if (first_time) then +#ifdef SERIAL_REMOVE_MPI + mpi_sumdd = 0 +#else + call mpi_op_create(ddpdd, .true., mpi_sumdd, ierr) +#endif + first_time = .false. + endif + + do ifld=1,nflds + arr_lsum_dd(ifld) = (0.0_r8,0.0_r8) + + do isum=1,nsummands + + ! Compute arr(isum,ifld) + arr_lsum_dd(ifld) using Knuth''s + ! trick. + t1 = arr(isum,ifld) + real(arr_lsum_dd(ifld)) + e = t1 - arr(isum,ifld) + t2 = ((real(arr_lsum_dd(ifld)) - e) & + + (arr(isum,ifld) - (t1 - e))) & + + aimag(arr_lsum_dd(ifld)) + + ! The result is t1 + t2, after normalization. + arr_lsum_dd(ifld) = cmplx ( t1 + t2, t2 - ((t1 + t2) - t1), r8 ) + enddo + + enddo + +#ifdef SERIAL_REMOVE_MPI + arr_gsum_dd = arr_lsum_dd +#else + call mpi_allreduce (arr_lsum_dd, arr_gsum_dd, nflds, & + MPI_COMPLEX16, mpi_sumdd, mpi_comm, ierr) +#endif + do ifld=1,nflds + arr_gsum(ifld) = real(arr_gsum_dd(ifld)) + enddo + + call ice_shr_reprosumx86_fix_end (old_cw) + + end subroutine ice_reprosum_ddpdd + +!----------------------------------------------------------------------- + + subroutine DDPDD (dda, ddb, len, itype) +!---------------------------------------------------------------------- +! +! Purpose: +! Modification of original codes written by David H. Bailey +! This subroutine computes ddb(i) = dda(i)+ddb(i) +! +!---------------------------------------------------------------------- + +! Arguments + + integer, intent(in) :: len ! array length + complex(r8), intent(in) :: dda(len) ! input + complex(r8), intent(inout) :: ddb(len) ! result + integer, intent(in) :: itype ! unused + +! Local workspace + + real(r8) e, t1, t2 + integer i + character(len=*),parameter :: subname = '(ice_reprosum_mod:DDPDD)' + +!----------------------------------------------------------------------- + + do i = 1, len +! Compute dda + ddb using Knuth's trick. + t1 = real(dda(i)) + real(ddb(i)) + e = t1 - real(dda(i)) + t2 = ((real(ddb(i)) - e) + (real(dda(i)) - (t1 - e))) & + + aimag(dda(i)) + aimag(ddb(i)) + +! The result is t1 + t2, after normalization. + ddb(i) = cmplx ( t1 + t2, t2 - ((t1 + t2) - t1), r8 ) + enddo + + + end subroutine DDPDD + +!----------------------------------------------------------------------- + + subroutine split_indices(total,num_pieces,ibeg,iend) +!---------------------------------------------------------------------- +! +! Purpose: +! Split range into 'num_pieces' +! +!---------------------------------------------------------------------- + +! Arguments + + integer, intent(in) :: total + integer, intent(in) :: num_pieces + integer, intent(out) :: ibeg(num_pieces), iend(num_pieces) + +! Local workspace + + integer :: itmp1, itmp2, ioffset, i + character(len=*),parameter :: subname = '(ice_reprosum_mod:split_indices)' + +!----------------------------------------------------------------------- + + itmp1 = total/num_pieces + itmp2 = mod(total,num_pieces) + ioffset = 0 + do i=1,itmp2 + ibeg(i) = ioffset + 1 + iend(i) = ioffset + (itmp1+1) + ioffset = iend(i) + enddo + do i=itmp2+1,num_pieces + ibeg(i) = ioffset + 1 + if (ibeg(i) > total) then + iend(i) = ibeg(i) - 1 + else + iend(i) = ioffset + itmp1 + ioffset = iend(i) + endif + enddo + + end subroutine split_indices + +!======================================================================== + +end module ice_reprosum diff --git a/cicecore/cicedynB/infrastructure/ice_domain.F90 b/cicecore/cicedynB/infrastructure/ice_domain.F90 index 236f6f179..b4b4c4ab2 100644 --- a/cicecore/cicedynB/infrastructure/ice_domain.F90 +++ b/cicecore/cicedynB/infrastructure/ice_domain.F90 @@ -478,12 +478,12 @@ subroutine init_domain_distribution(KMTG,ULATG) if (this_block%j_glob(j) > 0) then do i=this_block%ilo,this_block%ihi if (this_block%i_glob(i) > 0) then - ig = this_block%i_glob(i) + ig = this_block%i_glob(i) jg = this_block%j_glob(j) if (KMTG(ig,jg) > puny .and. & (ULATG(ig,jg) < shlat/rad_to_deg .or. & ULATG(ig,jg) > nhlat/rad_to_deg) ) & - nocn(n) = nocn(n) + flat(ig,jg) + nocn(n) = nocn(n) + flat(ig,jg) endif end do endif diff --git a/cicecore/cicedynB/infrastructure/ice_shr_reprosum86.c b/cicecore/cicedynB/infrastructure/ice_shr_reprosum86.c new file mode 100644 index 000000000..8bae08853 --- /dev/null +++ b/cicecore/cicedynB/infrastructure/ice_shr_reprosum86.c @@ -0,0 +1,83 @@ +/* + * src/x86.c + * + * This work was supported by the Director, Office of Science, Division + * of Mathematical, Information, and Computational Sciences of the + * U.S. Department of Energy under contract number DE-AC03-76SF00098. + * + * Copyright (c) 2000-2001 + * + * Contains functions to set and restore the round-to-double flag in the + * control word of a x86 FPU. + */ + +#define _NO_CHANGE 0 +#define _UPPER_CASE 1 +#define _ADD_UNDERSCORE 2 +#define _ADD_TWO_UNDERSCORES 3 + +#ifdef FORTRANUNDERSCORE +#define NAMING _ADD_UNDERSCORE +#endif + +#ifdef FORTRANDOUBLEUNDERSCORE +#define NAMING _ADD_TWO_UNDERSCORES +#endif + +#ifdef FORTRANCAPS +#define NAMING _UPPER_CASE +#endif + +#ifndef NAMING +#define NAMING _NO_CHANGE +#endif + +#if (NAMING == _ADD_UNDERSCORE) +#define ice_shr_reprosumx86_fix_start ice_shr_reprosumx86_fix_start_ +#define ice_shr_reprosumx86_fix_end ice_shr_reprosumx86_fix_end_ +#endif + +#if (NAMING == _ADD_TWO_UNDERSCORES) +#define ice_shr_reprosumx86_fix_start ice_shr_reprosumx86_fix_start__ +#define ice_shr_reprosumx86_fix_end ice_shr_reprosumx86_fix_end__ +#endif + +#if (NAMING == _UPPER_CASE) +#define ice_shr_reprosumx86_fix_start ICE_SHR_REPROSUMX86_FIX_START +#define ice_shr_reprosumx86_fix_end ICE_SHR_REPROSUMX86_FIX_END +#endif + +#ifdef x86 +#ifndef _FPU_GETCW +#define _FPU_GETCW(x) asm volatile ("fnstcw %0":"=m" (x)); +#endif + +#ifndef _FPU_SETCW +#define _FPU_SETCW(x) asm volatile ("fldcw %0": :"m" (x)); +#endif + +#ifndef _FPU_EXTENDED +#define _FPU_EXTENDED 0x0300 +#endif + +#ifndef _FPU_DOUBLE +#define _FPU_DOUBLE 0x0200 +#endif +#endif /* x86 */ + +void ice_shr_reprosumx86_fix_start(unsigned short *old_cw) { +#ifdef x86 + unsigned short new_cw; + + _FPU_GETCW(*old_cw); + new_cw = (*old_cw & ~_FPU_EXTENDED) | _FPU_DOUBLE; + _FPU_SETCW(new_cw); +#endif +} + +void ice_shr_reprosumx86_fix_end(unsigned short *old_cw) { +#ifdef x86 + _FPU_SETCW(*old_cw); +#endif +} + diff --git a/cicecore/drivers/cesm/CICE_copyright.txt b/cicecore/drivers/cesm/CICE_copyright.txt index 7564fa80f..959a3ce32 100644 --- a/cicecore/drivers/cesm/CICE_copyright.txt +++ b/cicecore/drivers/cesm/CICE_copyright.txt @@ -1,7 +1,7 @@ -! Copyright (c) 2018, Triad National Security, LLC +! Copyright (c) 2019, Triad National Security, LLC ! All rights reserved. ! -! Copyright 2018. Triad National Security, LLC. This software was +! Copyright 2019. Triad National Security, LLC. This software was ! produced under U.S. Government contract DE-AC52-06NA25396 for Los ! Alamos National Laboratory (LANL), which is operated by Triad ! National Security, LLC for the U.S. Department of Energy. The U.S. diff --git a/cicecore/drivers/cice/CICE.F90 b/cicecore/drivers/cice/CICE.F90 index 88ee77438..ec850b128 100644 --- a/cicecore/drivers/cice/CICE.F90 +++ b/cicecore/drivers/cice/CICE.F90 @@ -1,8 +1,8 @@ !======================================================================= -! Copyright (c) 2018, Triad National Security, LLC +! Copyright (c) 2019, Triad National Security, LLC ! All rights reserved. ! -! Copyright 2018. Triad National Security, LLC. This software was +! Copyright 2019. Triad National Security, LLC. This software was ! produced under U.S. Government contract DE-AC52-06NA25396 for Los ! Alamos National Laboratory (LANL), which is operated by Triad ! National Security, LLC for the U.S. Department of Energy. The U.S. diff --git a/cicecore/drivers/hadgem3/CICE.F90 b/cicecore/drivers/hadgem3/CICE.F90 index 40eee5676..ccc9177fb 100644 --- a/cicecore/drivers/hadgem3/CICE.F90 +++ b/cicecore/drivers/hadgem3/CICE.F90 @@ -1,8 +1,8 @@ !======================================================================= -! Copyright (c) 2018, Triad National Security, LLC +! Copyright (c) 2019, Triad National Security, LLC ! All rights reserved. ! -! Copyright 2018. Triad National Security, LLC. This software was +! Copyright 2019. Triad National Security, LLC. This software was ! produced under U.S. Government contract DE-AC52-06NA25396 for Los ! Alamos National Laboratory (LANL), which is operated by Triad ! National Security, LLC for the U.S. Department of Energy. The U.S. diff --git a/cicecore/shared/ice_arrays_column.F90 b/cicecore/shared/ice_arrays_column.F90 index c430ddd13..71d03ed94 100644 --- a/cicecore/shared/ice_arrays_column.F90 +++ b/cicecore/shared/ice_arrays_column.F90 @@ -268,7 +268,7 @@ module ice_arrays_column real (kind=dbl_kind), dimension(:), allocatable, public :: & R_C2N , & ! algal C to N (mole/mole) R_chl2N , & ! 3 algal chlorophyll to N (mg/mmol) - R_Si2N ! silica to nitrogen mole ratio for algal groups + R_Si2N ! silica to nitrogen mole ratio for algal groups !======================================================================= diff --git a/cicecore/shared/ice_calendar.F90 b/cicecore/shared/ice_calendar.F90 index 85b827038..a767bdfd7 100644 --- a/cicecore/shared/ice_calendar.F90 +++ b/cicecore/shared/ice_calendar.F90 @@ -272,6 +272,11 @@ subroutine calendar(ttime) endif enddo + if (dumpfreq == '1') then + if (mod(istep1, dumpfreq_n)==0) & + write_restart = 1 + endif + if (istep > 1) then do ns = 1, nstreams diff --git a/cicecore/shared/ice_constants.F90 b/cicecore/shared/ice_constants.F90 index 15581e787..c49732e35 100644 --- a/cicecore/shared/ice_constants.F90 +++ b/cicecore/shared/ice_constants.F90 @@ -60,7 +60,7 @@ module ice_constants c180 = 180.0_dbl_kind, & c360 = 360.0_dbl_kind, & c365 = 365.0_dbl_kind, & - c400 = 400.0_dbl_kind, & + c400 = 400.0_dbl_kind, & c1000= 1000.0_dbl_kind, & c3600= 3600.0_dbl_kind, & p001 = 0.001_dbl_kind, & diff --git a/cicecore/shared/ice_fileunits.F90 b/cicecore/shared/ice_fileunits.F90 index 24ba5c6b2..2620aa499 100644 --- a/cicecore/shared/ice_fileunits.F90 +++ b/cicecore/shared/ice_fileunits.F90 @@ -34,8 +34,8 @@ module ice_fileunits character (len=char_len), public :: & diag_type ! 'stdout' or 'file' - logical (log_kind), public :: & - bfbflag ! logical for bit-for-bit computations + character (len=char_len), public :: & + bfbflag ! method for bit-for-bit computations integer (kind=int_kind), public :: & nu_grid , & ! grid file diff --git a/cicecore/shared/ice_kinds_mod.F90 b/cicecore/shared/ice_kinds_mod.F90 index 4124f2594..769bb087f 100644 --- a/cicecore/shared/ice_kinds_mod.F90 +++ b/cicecore/shared/ice_kinds_mod.F90 @@ -14,6 +14,7 @@ module ice_kinds_mod use icepack_intfc, only: char_len_long => icepack_char_len_long use icepack_intfc, only: log_kind => icepack_log_kind use icepack_intfc, only: int_kind => icepack_int_kind + use icepack_intfc, only: int8_kind => icepack_int8_kind use icepack_intfc, only: real_kind => icepack_real_kind use icepack_intfc, only: dbl_kind => icepack_dbl_kind use icepack_intfc, only: r16_kind => icepack_r16_kind diff --git a/cicecore/shared/ice_spacecurve.F90 b/cicecore/shared/ice_spacecurve.F90 index ed108e1c7..78b256b8f 100644 --- a/cicecore/shared/ice_spacecurve.F90 +++ b/cicecore/shared/ice_spacecurve.F90 @@ -31,7 +31,7 @@ module ice_spacecurve ! !PUBLIC MEMBER FUNCTIONS: public :: GenSpaceCurve, & - IsLoadBalanced + IsLoadBalanced public :: Factor, & IsFactorable, & @@ -41,24 +41,24 @@ module ice_spacecurve ! !PRIVATE MEMBER FUNCTIONS: - private :: map, & - PeanoM, & - Hilbert, & - Cinco, & + private :: map, & + PeanoM, & + Hilbert, & + Cinco, & GenCurve private :: FirstFactor, & FindandMark integer(int_kind), dimension(:,:), allocatable :: & - dir, &! direction to move along each level + dir, &! direction to move along each level ordered ! the ordering integer(int_kind), dimension(:), allocatable :: & - pos ! position along each of the axes + pos ! position along each of the axes integer(int_kind) :: & - maxdim, &! dimensionality of entire space - vcnt ! visitation count + maxdim, &! dimensionality of entire space + vcnt ! visitation count logical :: verbose=.FALSE. @@ -80,7 +80,7 @@ recursive function Cinco(l,type,ma,md,ja,jd) result(ierr) ! !DESCRIPTION: ! This subroutine implements a Cinco space-filling curve. ! Cinco curves connect a Nb x Nb block of points where -! +! ! Nb = 5^p ! ! !REVISION HISTORY: @@ -91,12 +91,12 @@ recursive function Cinco(l,type,ma,md,ja,jd) result(ierr) ! !INPUT PARAMETERS integer(int_kind), intent(in) :: & - l, & ! level of the space-filling curve + l, & ! level of the space-filling curve type, & ! type of SFC curve - ma, & ! Major axis [0,1] - md, & ! direction of major axis [-1,1] - ja, & ! joiner axis [0,1] - jd ! direction of joiner axis [-1,1] + ma, & ! Major axis [0,1] + md, & ! direction of major axis [-1,1] + ja, & ! joiner axis [0,1] + jd ! direction of joiner axis [-1,1] ! !OUTPUT PARAMETERS @@ -111,12 +111,12 @@ recursive function Cinco(l,type,ma,md,ja,jd) result(ierr) !----------------------------------------------------------------------- integer(int_kind) :: & - lma, &! local major axis (next level) - lmd, &! local major direction (next level) - lja, &! local joiner axis (next level) - ljd, &! local joiner direction (next level) - ltype, &! type of SFC on next level - ll ! next level down + lma, &! local major axis (next level) + lmd, &! local major direction (next level) + lja, &! local joiner axis (next level) + ljd, &! local joiner direction (next level) + ltype, &! type of SFC on next level + ll ! next level down logical :: debug = .FALSE. @@ -963,8 +963,8 @@ function IncrementCurve(ja,jd) result(ierr) ! !INPUT PARAMETERS: integer(int_kind) :: & - ja, &! axis to increment - jd ! direction along axis + ja, &! axis to increment + jd ! direction along axis ! !OUTPUT PARAMETERS: integer(int_kind) :: ierr ! error return code @@ -975,7 +975,7 @@ function IncrementCurve(ja,jd) result(ierr) ! mark the newly visited point !----------------------------- ordered(pos(0)+1,pos(1)+1) = vcnt - + !------------------------------------ ! increment curve and update position !------------------------------------ @@ -1064,12 +1064,12 @@ function IsLoadBalanced(nelem,npart) ! !INTPUT PARAMETERS: integer(int_kind), intent(in) :: & - nelem, & ! number of blocks/elements to partition - npart ! size of partition + nelem, & ! number of blocks/elements to partition + npart ! size of partition ! !OUTPUT PARAMETERS: logical :: IsLoadBalanced ! .TRUE. if a perfectly load balanced - ! partition is possible + ! partition is possible !EOP !BOC !----------------------------------------------------------------------- @@ -1077,7 +1077,7 @@ function IsLoadBalanced(nelem,npart) ! local variables ! !----------------------------------------------------------------------- - + integer(int_kind) :: tmp1 ! temporary int character(len=*),parameter :: subname='(IsLoadBalanced)' @@ -1085,10 +1085,10 @@ function IsLoadBalanced(nelem,npart) !----------------------------------------------------------------------- tmp1 = nelem/npart - if(npart*tmp1 == nelem ) then - IsLoadBalanced=.TRUE. + if (npart*tmp1 == nelem ) then + IsLoadBalanced=.TRUE. else - IsLoadBalanced=.FALSE. + IsLoadBalanced=.FALSE. endif !EOP @@ -1285,7 +1285,7 @@ function Factor(num) result(res) !----------------------------------------------------------------------- integer(int_kind) :: & - tmp,tmp2,tmp3,tmp5 ! tempories for the factorization algorithm + tmp,tmp2,tmp3,tmp5 ! tempories for the factorization algorithm integer(int_kind) :: i,n ! loop tempories logical :: found ! logical temporary character(len=*),parameter :: subname='(Factor)' @@ -1437,9 +1437,9 @@ subroutine map(l) !----------------------------------------------------------------------- integer(int_kind) :: & - d, & ! dimension of curve only 2D is supported - type, & ! type of space-filling curve to start off - ierr ! error return code + d, & ! dimension of curve only 2D is supported + type, & ! type of space-filling curve to start off + ierr ! error return code character(len=*),parameter :: subname='(map)' d = SIZE(pos) @@ -1484,8 +1484,8 @@ subroutine PrintCurve(Mesh) ! !----------------------------------------------------------------------- integer(int_kind) :: & - gridsize, &! order of space-filling curve - i ! loop temporary + gridsize, &! order of space-filling curve + i ! loop temporary character(len=*),parameter :: subname='(PrintCurve)' !----------------------------------------------------------------------- @@ -1523,7 +1523,7 @@ subroutine PrintCurve(Mesh) write (*,*) "------------------------------------------" do i=1,gridsize write(*,6) Mesh(1,i),Mesh(2,i),Mesh(3,i), & - Mesh(4,i),Mesh(5,i),Mesh(6,i) + Mesh(4,i),Mesh(5,i),Mesh(6,i) enddo else if(gridsize == 8) then write (*,*) "A Level 3 Hilbert Curve:" @@ -1615,7 +1615,7 @@ subroutine PrintCurve(Mesh) Mesh(13,i),Mesh(14,i),Mesh(15,i),Mesh(16,i), & Mesh(17,i),Mesh(18,i),Mesh(19,i),Mesh(20,i), & Mesh(21,i),Mesh(22,i),Mesh(23,i),Mesh(24,i), & - Mesh(25,i) + Mesh(25,i) enddo else if(gridsize == 27) then write (*,*) "A Level 3 Peano Meandering Curve:" @@ -1683,7 +1683,7 @@ subroutine GenSpaceCurve(Mesh) ! !INPUT/OUTPUT PARAMETERS: integer(int_kind), target,intent(inout) :: & - Mesh(:,:) ! The SFC ordering in 2D array + Mesh(:,:) ! The SFC ordering in 2D array !EOP !BOC @@ -1694,8 +1694,8 @@ subroutine GenSpaceCurve(Mesh) !----------------------------------------------------------------------- integer(int_kind) :: & - level, &! Level of space-filling curve - dim ! dimension of SFC... currently limited to 2D + level, &! Level of space-filling curve + dim ! dimension of SFC... currently limited to 2D integer(int_kind) :: gridsize ! number of points on a side diff --git a/cicecore/version.txt b/cicecore/version.txt index fb024cb80..de34d9d31 100644 --- a/cicecore/version.txt +++ b/cicecore/version.txt @@ -1 +1 @@ -CICE 6.0.0 +CICE 6.0.1 diff --git a/configuration/scripts/Makefile b/configuration/scripts/Makefile index a346cf37e..362397b1d 100644 --- a/configuration/scripts/Makefile +++ b/configuration/scripts/Makefile @@ -1,9 +1,5 @@ #------------------------------------------------------------------------------- -# CVS $Id: Makefile.std,v 1.1 2004/02/09 18:13:52 lipscomb Exp $ -# CVS $Source: /home/climate/CVS-COSIM/cice/bld/Makefile.std,v $ -# CVS $Name: $ -#------------------------------------------------------------------------------- -# Common Makefile: a framework for building all CCSM components and more +# Common Makefile for CICE # # Command-line variables # MACFILE= ~ the macros definition file to use/include @@ -20,10 +16,8 @@ # MACFILE, used to trigger special compilation flags # # Usage examples: -# % gmake MACFILE=Macros.AIX VPFILE=Filepath MODEL=ccm3 EXEC=atm -# % gmake MACFILE=Macros.AIX VPFILE=Filepath SRCFILE=Srclist EXEC=pop -# % gmake MACFILE=Macros.C90 VPATH="dir1 dir2" SRCS="file1.c file2.F90" -# % gmake MACFILE=Macros.SUN SRCS="test.F" +# % gmake -j 8 VPFILE=Filepath EXEC=${ICE_RUNDIR}/cice \ +# -f ${ICE_CASEDIR}/Makefile MACFILE=${ICE_CASEDIR}/Macros.conrad_intel #------------------------------------------------------------------------------- #------------------------------------------------------------------------------- @@ -98,10 +92,22 @@ db_files: db_flags: @echo " " @echo "* cpp := $(CPP) $(CPPFLAGS) $(CPPDEFS) $(INCS) $(INCLDIR)" - @echo "* cc := cc -c $(CFLAGS) $(INCS) $(INCLDIR)" + @echo "* cc := $(CC) -c $(CFLAGS) $(INCS) $(INCLDIR)" @echo "* .F.o := $(FC) -c $(FFLAGS) $(FIXEDFLAGS) $(INCS) $(INCLDIR)" @echo "* .F90.o := $(FC) -c $(FFLAGS) $(FREEFLAGS) $(INCS) $(INCLDIR)" +#------------------------------------------------------------------------------- +# build rule for makdep: MACFILE, cmd-line, or env vars must provide +# the needed macros +#------------------------------------------------------------------------------- + +ifndef $(CFLAGS_HOST) + CFLAGS_HOST := +endif + +$(DEPGEN): $(ICE_CASEDIR)/makdep.c + $(SCC) -o $@ $(CFLAGS_HOST) $< + #------------------------------------------------------------------------------- # build rules: MACFILE, cmd-line, or env vars must provide the needed macros #------------------------------------------------------------------------------- @@ -110,7 +116,7 @@ $(EXEC): $(OBJS) $(LD) -o $(EXEC) $(LDFLAGS) $(OBJS) $(ULIBS) $(SLIBS) .c.o: - cc $(CFLAGS) $(CPPDEFS) $(INCS) $(INCLDIR) $< + $(CC) $(CFLAGS) $(CPPDEFS) $(INCS) $(INCLDIR) $< .F.o: $(FC) -c $(FFLAGS) $(FIXEDFLAGS) $(CPPDEFS) $(INCS) $(INCLDIR) $< diff --git a/configuration/scripts/cice.batch.csh b/configuration/scripts/cice.batch.csh index 2efa2fe7f..0647c5813 100755 --- a/configuration/scripts/cice.batch.csh +++ b/configuration/scripts/cice.batch.csh @@ -18,6 +18,7 @@ set acct = ${ICE_ACCOUNT} @ ncores = ${ntasks} * ${nthrds} @ taskpernode = ${maxtpn} / $nthrds +if (${taskpernode} == 0) set taskpernode = 1 @ nnodes = ${ntasks} / ${taskpernode} if (${nnodes} * ${taskpernode} < ${ntasks}) @ nnodes = $nnodes + 1 set taskpernodelimit = ${taskpernode} @@ -27,17 +28,24 @@ if (${taskpernodelimit} > ${ntasks}) set taskpernodelimit = ${ntasks} set ptile = $taskpernode if ($ptile > ${maxtpn} / 2) @ ptile = ${maxtpn} / 2 +set runlength = ${ICE_RUNLENGTH} +if ($?ICE_MACHINE_MAXRUNLENGTH) then + if (${runlength} > ${ICE_MACHINE_MAXRUNLENGTH}) then + set runlength = ${ICE_MACHINE_MAXRUNLENGTH} + endif +endif + set queue = "${ICE_QUEUE}" set batchtime = "00:15:00" -if (${ICE_RUNLENGTH} > 0) set batchtime = "00:29:00" -if (${ICE_RUNLENGTH} == 1) set batchtime = "00:59:00" -if (${ICE_RUNLENGTH} == 2) set batchtime = "2:00:00" -if (${ICE_RUNLENGTH} == 3) set batchtime = "3:00:00" -if (${ICE_RUNLENGTH} == 4) set batchtime = "4:00:00" -if (${ICE_RUNLENGTH} == 5) set batchtime = "5:00:00" -if (${ICE_RUNLENGTH} == 6) set batchtime = "6:00:00" -if (${ICE_RUNLENGTH} == 7) set batchtime = "7:00:00" -if (${ICE_RUNLENGTH} >= 8) set batchtime = "8:00:00" +if (${runlength} == 0) set batchtime = "00:29:00" +if (${runlength} == 1) set batchtime = "00:59:00" +if (${runlength} == 2) set batchtime = "2:00:00" +if (${runlength} == 3) set batchtime = "3:00:00" +if (${runlength} == 4) set batchtime = "4:00:00" +if (${runlength} == 5) set batchtime = "5:00:00" +if (${runlength} == 6) set batchtime = "6:00:00" +if (${runlength} == 7) set batchtime = "7:00:00" +if (${runlength} >= 8) set batchtime = "8:00:00" set shortcase = `echo ${ICE_CASENAME} | cut -c1-15` @@ -96,14 +104,16 @@ cat >> ${jobfile} << EOFB EOFB else if (${ICE_MACHINE} =~ cori*) then +@ nthrds2 = ${nthrds} * 2 cat >> ${jobfile} << EOFB #SBATCH -J ${ICE_CASENAME} -#SBATCH -p ${queue} ###SBATCH -A ${acct} -#SBATCH -n ${ncores} -#SBATCH -t ${batchtime} -#SBATCH -L SCRATCH -#SBATCH -C haswell +#SBATCH --qos ${queue} +#SBATCH --time ${batchtime} +#SBATCH --nodes ${nnodes} +#SBATCH --ntasks ${ntasks} +#SBATCH --cpus-per-task ${nthrds2} +#SBATCH --constraint haswell ###SBATCH -e filename ###SBATCH -o filename ###SBATCH --mail-type FAIL @@ -149,6 +159,55 @@ cat >> ${jobfile} << EOFB #SBATCH --qos=standby EOFB +else if (${ICE_MACHINE} =~ millikan*) then +cat >> ${jobfile} << EOFB +#SBATCH -J ${ICE_CASENAME} +#SBATCH -t ${batchtime} +#SBATCH -A ${acct} +#SBATCH -N ${nnodes} +#SBATCH -e slurm%j.err +#SBATCH -o slurm%j.out +###SBATCH --mail-type END,FAIL +###SBATCH --mail-user=amelie.bouchat@canada.ca +#SBATCH --qos=standby +EOFB + +else if (${ICE_MACHINE} =~ brooks*) then +cat >> ${jobfile} << EOFB +#PBS -N ${ICE_CASENAME} +#PBS -j oe +#PBS -l select=${nnodes}:ncpus=${corespernode}:mpiprocs=${taskpernodelimit}:ompthreads=${nthrds} +#PBS -l walltime=${batchtime} +EOFB + +else if (${ICE_MACHINE} =~ theia*) then +cat >> ${jobfile} << EOFB +#SBATCH -J ${ICE_CASENAME} +#SBATCH -t ${batchtime} +#SBATCH -q batch +#SBATCH -A marine-cpu +#SBATCH -N ${nnodes} +#SBATCH -e slurm%j.err +#SBATCH -o slurm%j.out +#SBATCH --mail-type END,FAIL +#SBATCH --mail-user=robert.grumbine@noaa.gov +EOFB + +else if (${ICE_MACHINE} =~ phase2*) then +cat >> ${jobfile} << EOFB +# nothing to do +EOFB + +else if (${ICE_MACHINE} =~ phase3*) then +cat >> ${jobfile} << EOFB +# nothing to do +EOFB + +else if (${ICE_MACHINE} =~ high_Sierra*) then +cat >> ${jobfile} << EOFB +# nothing to do +EOFB + else if (${ICE_MACHINE} =~ testmachine*) then cat >> ${jobfile} << EOFB # nothing to do diff --git a/configuration/scripts/cice.build b/configuration/scripts/cice.build index e7a9e42a9..62ea6c447 100755 --- a/configuration/scripts/cice.build +++ b/configuration/scripts/cice.build @@ -54,15 +54,8 @@ endif if !(-d ${ICE_OBJDIR}) mkdir -p ${ICE_OBJDIR} cd ${ICE_OBJDIR} -#setenv ICE_CPPDEFS "-DNXGLOB=${ICE_NXGLOB} -DNYGLOB=${ICE_NYGLOB} -DBLCKX=${ICE_BLCKX} -DBLCKY=${ICE_BLCKY} -DMXBLCKS=${ICE_MXBLCKS} -DNICELYR=${NICELYR} -DNSNWLYR=${NSNWLYR} -DNICECAT=${NICECAT} -DTRAGE=${TRAGE} -DTRFY=${TRFY} -DTRLVL=${TRLVL} -DTRPND=${TRPND} -DTRBRI=${TRBRI} -DNTRAERO=${NTRAERO} -DTRZS=${TRZS} -DNBGCLYR=${NBGCLYR} -DTRALG=${TRALG} -DTRBGCZ=${TRBGCZ} -DTRDOC=${TRDOC} -DTRDOC=${TRDOC} -DTRDIC=${TRDIC} -DTRDON=${TRDON} -DTRFED=${TRFED} -DTRFEP=${TRFEP} -DTRZAERO=${TRZAERO} -DTRBGCS=${TRBGCS} -DNUMIN=${NUMIN} -DNUMAX=${NUMAX}" -#setenv ICE_CPPDEFS "-DNICELYR=${NICELYR} -DNSNWLYR=${NSNWLYR} -DNICECAT=${NICECAT} -DTRAGE=${TRAGE} -DTRFY=${TRFY} -DTRLVL=${TRLVL} -DTRPND=${TRPND} -DTRBRI=${TRBRI} -DNTRAERO=${NTRAERO} -DTRZS=${TRZS} -DNBGCLYR=${NBGCLYR} -DTRALG=${TRALG} -DTRBGCZ=${TRBGCZ} -DTRDOC=${TRDOC} -DTRDOC=${TRDOC} -DTRDIC=${TRDIC} -DTRDON=${TRDON} -DTRFED=${TRFED} -DTRFEP=${TRFEP} -DTRZAERO=${TRZAERO} -DTRBGCS=${TRBGCS} -DNUMIN=${NUMIN} -DNUMAX=${NUMAX}" -#setenv ICE_CPPDEFS "-DNUMIN=${NUMIN} -DNUMAX=${NUMAX}" setenv ICE_CPPDEFS " " -if ($DITTO == 'yes') then - setenv ICE_CPPDEFS "${ICE_CPPDEFS} -DREPRODUCIBLE" -endif - if (${ICE_IOTYPE} == 'netcdf') then set IODIR = io_netcdf setenv ICE_CPPDEFS "${ICE_CPPDEFS} -Dncdf" @@ -96,7 +89,8 @@ cat ${ICE_OBJDIR}/Filepath echo " " echo "building makdep" -cc -o makdep ${ICE_CASEDIR}/makdep.c || exit 2 +${ICE_MACHINE_MAKE} \ + -f ${ICE_CASEDIR}/Makefile MACFILE=${ICE_CASEDIR}/Macros.${ICE_MACHCOMP} makdep || exit 2 echo "building cice > ${ICE_OBJDIR}/${ICE_BLDLOG_FILE}" @@ -110,12 +104,10 @@ if (${ICE_CLEANBUILD} == 'true') then echo "gmake clean" if (${quiet} == "true") then ${ICE_MACHINE_MAKE} VPFILE=Filepath EXEC=${ICE_RUNDIR}/cice \ - CPPDEFS="${ICE_CPPDEFS}" \ -f ${ICE_CASEDIR}/Makefile MACFILE=${ICE_CASEDIR}/Macros.${ICE_MACHCOMP} clean >& ${ICE_BLDLOG_FILE} else ${ICE_MACHINE_MAKE} VPFILE=Filepath EXEC=${ICE_RUNDIR}/cice \ - CPPDEFS="${ICE_CPPDEFS}" \ - -f ${ICE_CASEDIR}/Makefile MACFILE=${ICE_CASEDIR}/Macros.${ICE_MACHCOMP} clean | tee ${ICE_BLDLOG_FILE} + -f ${ICE_CASEDIR}/Makefile MACFILE=${ICE_CASEDIR}/Macros.${ICE_MACHCOMP} clean |& tee ${ICE_BLDLOG_FILE} endif endif @@ -123,23 +115,35 @@ echo "gmake cice" if (${quiet} == "true") then echo " quiet mode on... patience" ${ICE_MACHINE_MAKE} -j ${ICE_MACHINE_BLDTHRDS} VPFILE=Filepath EXEC=${ICE_RUNDIR}/cice \ - CPPDEFS="${ICE_CPPDEFS}" \ -f ${ICE_CASEDIR}/Makefile MACFILE=${ICE_CASEDIR}/Macros.${ICE_MACHCOMP} >& ${ICE_BLDLOG_FILE} set bldstat = ${status} else ${ICE_MACHINE_MAKE} -j ${ICE_MACHINE_BLDTHRDS} VPFILE=Filepath EXEC=${ICE_RUNDIR}/cice \ - CPPDEFS="${ICE_CPPDEFS}" \ - -f ${ICE_CASEDIR}/Makefile MACFILE=${ICE_CASEDIR}/Macros.${ICE_MACHCOMP} | tee ${ICE_BLDLOG_FILE} + -f ${ICE_CASEDIR}/Makefile MACFILE=${ICE_CASEDIR}/Macros.${ICE_MACHCOMP} |& tee ${ICE_BLDLOG_FILE} set bldstat = ${status} endif if !(-d ${ICE_LOGDIR}) mkdir -p ${ICE_LOGDIR} cp -p ${ICE_BLDLOG_FILE} ${ICE_LOGDIR}/ +echo " " + +# special effort to trap and highlight circular dependencies errors +set circularstat = `grep "make: Circular" ${ICE_BLDLOG_FILE} | wc -l` +if (${circularstat} > 0) then + echo "${0}: ERROR: Circular dependencies found" + grep "make: Circular" ${ICE_BLDLOG_FILE} + if (-e ${ICE_RUNDIR}/cice) then + echo "${0}: Deleting cice executable" + rm ${ICE_RUNDIR}/cice + endif + set bldstat = 55 +endif + if (${bldstat} != 0) then echo "${0}: COMPILE FAILED, see" echo " cat ${ICE_OBJDIR}/${ICE_BLDLOG_FILE}" - if (${quiet} == "true") then + if (${quiet} == "true" && ${circularstat} == 0) then tail -10 ${ICE_OBJDIR}/${ICE_BLDLOG_FILE} endif if ( ${ICE_TEST} != ${ICE_SPVAL} ) then diff --git a/configuration/scripts/cice.launch.csh b/configuration/scripts/cice.launch.csh index 709c3013c..77e237683 100755 --- a/configuration/scripts/cice.launch.csh +++ b/configuration/scripts/cice.launch.csh @@ -14,6 +14,7 @@ set maxtpn = ${ICE_MACHINE_TPNODE} @ ncores = ${ntasks} * ${nthrds} @ taskpernode = ${maxtpn} / $nthrds +if (${taskpernode} == 0) set taskpernode = 1 @ nnodes = ${ntasks} / ${taskpernode} if (${nnodes} * ${taskpernode} < ${ntasks}) @ nnodes = $nnodes + 1 set taskpernodelimit = ${taskpernode} @@ -21,21 +22,41 @@ if (${taskpernodelimit} > ${ntasks}) set taskpernodelimit = ${ntasks} @ corespernode = ${taskpernodelimit} * ${nthrds} #========================================== - if (${ICE_MACHINE} =~ cheyenne*) then +if (${ICE_COMMDIR} =~ serial*) then +cat >> ${jobfile} << EOFR +./cice >&! \$ICE_RUNLOG_FILE +EOFR +else cat >> ${jobfile} << EOFR mpiexec_mpt -n ${ntasks} ./cice >&! \$ICE_RUNLOG_FILE EOFR +endif + #======= else if (${ICE_MACHINE} =~ hobart*) then +if (${ICE_COMMDIR} =~ serial*) then +cat >> ${jobfile} << EOFR +./cice >&! \$ICE_RUNLOG_FILE +EOFR +else cat >> ${jobfile} << EOFR mpiexec -n ${ntasks} ./cice >&! \$ICE_RUNLOG_FILE EOFR +endif + #======= else if (${ICE_MACHINE} =~ thunder*) then +if (${ICE_COMMDIR} =~ serial*) then +cat >> ${jobfile} << EOFR +./cice >&! \$ICE_RUNLOG_FILE +EOFR +else cat >> ${jobfile} << EOFR mpiexec_mpt -np ${ntasks} omplace ./cice >&! \$ICE_RUNLOG_FILE EOFR +endif + #======= else if (${ICE_MACHINE} =~ gaffney* || ${ICE_MACHINE} =~ koehr*) then if (${ICE_COMMDIR} =~ serial*) then @@ -47,11 +68,19 @@ cat >> ${jobfile} << EOFR mpiexec_mpt -np ${ntasks} omplace ./cice >&! \$ICE_RUNLOG_FILE EOFR endif + #======= else if (${ICE_MACHINE} =~ onyx*) then +if (${ICE_COMMDIR} =~ serial*) then +cat >> ${jobfile} << EOFR +./cice >&! \$ICE_RUNLOG_FILE +EOFR +else cat >> ${jobfile} << EOFR aprun -n ${ntasks} -N ${taskpernodelimit} -d ${nthrds} ./cice >&! \$ICE_RUNLOG_FILE EOFR +endif + #======= else if (${ICE_MACHINE} =~ gordon* || ${ICE_MACHINE} =~ conrad*) then if (${ICE_COMMDIR} =~ serial*) then @@ -63,39 +92,127 @@ cat >> ${jobfile} << EOFR aprun -n ${ntasks} -N ${taskpernodelimit} -d ${nthrds} ./cice >&! \$ICE_RUNLOG_FILE EOFR endif + #======= else if (${ICE_MACHINE} =~ cori*) then +if (${ICE_COMMDIR} =~ serial*) then cat >> ${jobfile} << EOFR -srun -n ${ntasks} -c ${nthrds} ./cice >&! \$ICE_RUNLOG_FILE +./cice >&! \$ICE_RUNLOG_FILE EOFR +else +cat >> ${jobfile} << EOFR +srun --cpu-bind=cores ./cice >&! \$ICE_RUNLOG_FILE +EOFR +endif + #======= else if (${ICE_MACHINE} =~ badger*) then +if (${ICE_COMMDIR} =~ serial*) then +cat >> ${jobfile} << EOFR +./cice >&! \$ICE_RUNLOG_FILE +EOFR +else cat >> ${jobfile} << EOFR mpirun -np ${ntasks} ./cice >&! \$ICE_RUNLOG_FILE EOFR +endif + #======= else if (${ICE_MACHINE} =~ fram*) then +if (${ICE_COMMDIR} =~ serial*) then +cat >> ${jobfile} << EOFR +./cice >&! \$ICE_RUNLOG_FILE +EOFR +else cat >> ${jobfile} << EOFR mpirun -np ${ntasks} ./cice >&! \$ICE_RUNLOG_FILE EOFR +endif + #======= else if (${ICE_MACHINE} =~ cesium*) then +if (${ICE_COMMDIR} =~ serial*) then +cat >> ${jobfile} << EOFR +./cice >&! \$ICE_RUNLOG_FILE +EOFR +else +cat >> ${jobfile} << EOFR +mpirun -np ${ntasks} ./cice >&! \$ICE_RUNLOG_FILE +EOFR +endif + +#======= +else if (${ICE_MACHINE} =~ millikan*) then +if (${ICE_COMMDIR} =~ serial*) then +cat >> ${jobfile} << EOFR +./cice >&! \$ICE_RUNLOG_FILE +EOFR +else +cat >> ${jobfile} << EOFR +mpirun -np ${ntasks} ./cice >&! \$ICE_RUNLOG_FILE +EOFR +endif + +#======= +else if (${ICE_MACHINE} =~ brooks*) then +if (${ICE_COMMDIR} =~ serial*) then +cat >> ${jobfile} << EOFR +./cice >&! \$ICE_RUNLOG_FILE +EOFR +else +cat >> ${jobfile} << EOFR +aprun -n ${ntasks} -N ${taskpernodelimit} -d ${nthrds} ./cice >&! \$ICE_RUNLOG_FILE +EOFR +endif + +#======= +else if (${ICE_MACHINE} =~ theia*) then +cat >> ${jobfile} << EOFR +#mpirun -np ${ntasks} ./cice >&! \$ICE_RUNLOG_FILE +srun -n ${ntasks} -c ${nthrds} ./cice >&! \$ICE_RUNLOG_FILE +#./cice >&! \$ICE_RUNLOG_FILE +EOFR +#======= +else if (${ICE_MACHINE} =~ high_Sierra*) then cat >> ${jobfile} << EOFR mpirun -np ${ntasks} ./cice >&! \$ICE_RUNLOG_FILE +#./cice >&! \$ICE_RUNLOG_FILE +EOFR + +#======= +else if (${ICE_MACHINE} =~ phase2*) then +cat >> ${jobfile} << EOFR +mpirun -np ${ntasks} ./cice >&! \$ICE_RUNLOG_FILE +#./cice >&! \$ICE_RUNLOG_FILE +EOFR + +#======= +else if (${ICE_MACHINE} =~ phase3*) then +cat >> ${jobfile} << EOFR +mpirun -np ${ntasks} ./cice >&! \$ICE_RUNLOG_FILE +#./cice >&! \$ICE_RUNLOG_FILE EOFR #======= else if (${ICE_MACHINE} =~ testmachine*) then cat >> ${jobfile} << EOFR ./cice >&! \$ICE_RUNLOG_FILE EOFR + #======= else if (${ICE_MACHINE} =~ travisCI*) then +if (${ICE_COMMDIR} =~ serial*) then +cat >> ${jobfile} << EOFR +./cice >&! \$ICE_RUNLOG_FILE +EOFR +else cat >> ${jobfile} << EOFR mpirun -np ${ntasks} ./cice >&! \$ICE_RUNLOG_FILE EOFR +endif #cat >> ${jobfile} << EOFR #srun -n ${ntasks} -c ${nthrds} ./cice >&! \$ICE_RUNLOG_FILE #EOFR + #======= else echo "${0} ERROR ${ICE_MACHINE} unknown" diff --git a/configuration/scripts/cice.settings b/configuration/scripts/cice.settings index 68d297634..0513f1dd1 100755 --- a/configuration/scripts/cice.settings +++ b/configuration/scripts/cice.settings @@ -21,12 +21,13 @@ setenv ICE_NTASKS undefined setenv ICE_NTHRDS undefined setenv ICE_TEST undefined setenv ICE_TESTNAME undefined +setenv ICE_TESTID undefined setenv ICE_BASELINE undefined setenv ICE_BASEGEN undefined setenv ICE_BASECOM undefined setenv ICE_BFBCOMP undefined setenv ICE_SPVAL undefined -setenv ICE_RUNLENGTH 0 +setenv ICE_RUNLENGTH -1 setenv ICE_ACCOUNT undefined setenv ICE_QUEUE undefined @@ -38,6 +39,5 @@ setenv ICE_COMMDIR mpi if (${ICE_NTASKS} == 1) setenv ICE_COMMDIR serial ### Specialty code -setenv DITTO no # reproducible diagnostics setenv ICE_BLDDEBUG false # build debug flags diff --git a/configuration/scripts/ice_in b/configuration/scripts/ice_in index 068af6812..d78e2766a 100644 --- a/configuration/scripts/ice_in +++ b/configuration/scripts/ice_in @@ -21,7 +21,7 @@ dumpfreq = 'd' dumpfreq_n = 1 dump_last = .false. - bfbflag = .false. + bfbflag = 'off' diagfreq = 24 diag_type = 'stdout' diag_file = 'ice_diag.d' @@ -102,7 +102,7 @@ kdyn = 1 ndte = 240 revised_evp = .false. - evp_kernel_ver = 0 + kevp_kernel = 0 brlx = 300.0 arlx = 300.0 advection = 'remap' diff --git a/configuration/scripts/machines/Macros.badger_intel b/configuration/scripts/machines/Macros.badger_intel index 54cdb1493..9a92262c2 100644 --- a/configuration/scripts/machines/Macros.badger_intel +++ b/configuration/scripts/machines/Macros.badger_intel @@ -3,7 +3,7 @@ #============================================================================== CPP := fpp -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 -fp-model precise -xHost FIXEDFLAGS := -132 @@ -17,42 +17,37 @@ else FFLAGS += -O2 endif +SCC := icc +SFC := ifort +MPICC := mpicc +MPIFC := mpif90 + ifeq ($(ICE_COMMDIR), mpi) - FC := mpif90 + FC := $(MPIFC) + CC := $(MPICC) else - FC := ifort + FC := $(SFC) + CC := $(SCC) endif +LD:= $(FC) -MPICC:= mpicc - -MPIFC:= mpif90 -LD:= $(MPIFC) - -# set in Macros file -NETCDF_PATH := /usr/projects/climate/SHARED_CLIMATE/software/conejo/netcdf/3.6.3/intel-13.0.1 -PNETCDF_PATH := /usr/projects/climate/SHARED_CLIMATE/software/conejo/parallel-netcdf/1.3.1/intel-13.0.1/openmpi-1.6.3 -#LAPACK_LIBDIR := /glade/apps/opt/lapack/3.4.2/intel/12.1.5/lib +NETCDF_PATH := /usr/projects/hpcsoft/toss3/common/netcdf/4.4.0_intel-18.0.5 +PNETCDF_PATH := /usr/projects/hpcsoft/toss3/badger/netcdf/4.4.0_intel-18.0.5_openmpi-2.1.2 PIO_CONFIG_OPTS:= --enable-filesystem-hints=gpfs ifeq ($(ICE_IOTYPE), netcdf) - INCLDIR := $(INCLDIR) -I$(NETCDF_PATH)/include -# INCLDIR := $(INCLDIR) -I/usr/projects/climate/SHARED_CLIMATE/software/conejo/netcdf/3.6.3/intel-13.0.1/include + INCLDIR := $(INCLDIR) -I$(NETCDF_PATH)/include -I$(PNETCDF_PATH)/include LIB_NETCDF := $(NETCDF_PATH)/lib LIB_PNETCDF := $(PNETCDF_PATH)/lib LIB_MPI := $(IMPILIBDIR) - #SLIBS := -L$(LIB_NETCDF) -lnetcdf -L$(LIB_PNETCDF) -lpnetcdf -L$(LAPACK_LIBDIR) -llapack -lblas - SLIBS := -L$(LIB_NETCDF) -lnetcdf -L$(LIB_PNETCDF) -lpnetcdf + SLIBS := -L$(LIB_NETCDF) -lnetcdf -L$(LIB_PNETCDF) -lnetcdff else SLIBS := endif -SCC:= icc - -SFC:= ifort - ifeq ($(ICE_THREADED), true) LDFLAGS += -qopenmp CFLAGS += -qopenmp diff --git a/configuration/scripts/machines/Macros.brooks_intel b/configuration/scripts/machines/Macros.brooks_intel new file mode 100644 index 000000000..5bd725143 --- /dev/null +++ b/configuration/scripts/machines/Macros.brooks_intel @@ -0,0 +1,78 @@ +#============================================================================== +# Makefile macros for ECCC brooks +#============================================================================== +# For use with intel compiler +#============================================================================== + +CPP := fpp +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} +CFLAGS := -c -O2 -fp-model precise +#-xHost + +FIXEDFLAGS := -132 +FREEFLAGS := -FR +FFLAGS := -fp-model source -convert big_endian -assume byterecl -ftz -traceback -diag-disable 5140 +#-xHost +FFLAGS_NOOPT:= -O0 + +ifeq ($(ICE_BLDDEBUG), true) + FFLAGS += -O0 -g -check -fpe0 -ftrapuv -fp-model except -check noarg_temp_created +# -heap-arrays 1024 +else + FFLAGS += -O2 +endif + +SCC := cc +SFC := ftn +MPICC := cc +MPIFC := ftn + +ifeq ($(ICE_COMMDIR), mpi) + FC := $(MPIFC) + CC := $(MPICC) +else + FC := $(SFC) + CC := $(SCC) +endif +LD:= $(FC) + +#NETCDF_PATH := /fs/ssm/hpco/tmp/eccc/201402/04/intel-2016.1.150/ubuntu-14.04-amd64-64/ + +PIO_CONFIG_OPTS:= --enable-filesystem-hints=gpfs + +#PNETCDF_PATH := $(PNETCDF) +#PNETCDF_PATH := /glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/lib + +INCLDIR := $(INCLDIR) + +LIB_NETCDF := $(NETCDF_PATH)/lib +LIB_PNETCDF := $(PNETCDF_PATH)/lib +LIB_MPI := $(IMPILIBDIR) + +#SLIBS := -L$(LIB_NETCDF) -lnetcdff -lnetcdf -L$(LIB_PNETCDF) -lpnetcdf -lgptl +#SLIBS := -L$(LIB_NETCDF) -lnetcdff -lnetcdf + +ifeq ($(ICE_IOTYPE), netcdf) + INCLDIR += $(shell nf-config --fflags) + SLIBS := $(shell nf-config --flibs) +endif + + +ifeq ($(ICE_THREADED), true) + LDFLAGS += -qopenmp + CFLAGS += -qopenmp + FFLAGS += -qopenmp +endif + +### if using parallel I/O, load all 3 libraries. PIO must be first! +ifeq ($(IO_TYPE), pio) + PIO_PATH:=/glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/lib + INCLDIR += -I/glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/include + SLIBS := $(SLIBS) -L$(PIO_PATH) -lpiof + + CPPDEFS := $(CPPDEFS) -Dncdf +endif + +ifeq ($(IO_TYPE), netcdf) + CPPDEFS := $(CPPDEFS) -Dncdf +endif diff --git a/configuration/scripts/machines/Macros.cesium_intel b/configuration/scripts/machines/Macros.cesium_intel index 454ba859a..aca33b9fd 100644 --- a/configuration/scripts/machines/Macros.cesium_intel +++ b/configuration/scripts/machines/Macros.cesium_intel @@ -5,7 +5,7 @@ #============================================================================== CPP := fpp -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${CICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 -fp-model precise #-xHost @@ -16,23 +16,27 @@ FFLAGS := -fp-model source -convert big_endian -assume byterecl -ftz -traceb FFLAGS_NOOPT:= -O0 ifeq ($(ICE_BLDDEBUG), true) - FFLAGS += -O0 -g -check -fpe0 -ftrapuv -fp-model except + FFLAGS += -O0 -g -check -fpe0 -ftrapuv -fp-model except -check noarg_temp_created -init=snan,arrays +# -heap-arrays 1024 else FFLAGS += -O2 endif -ifeq ($(CICE_COMMDIR), mpi) - FC := s.f90 -mpi +SCC := icc +SFC := ifort +MPICC := mpicc +MPIFC := mpif90 + +ifeq ($(ICE_COMMDIR), mpi) + FC := $(MPIFC) + CC := $(MPICC) else - FC := s.f90 + FC := $(SFC) + CC := $(SCC) endif +LD:= $(FC) -MPICC:= s.cc -mpi - -MPIFC:= s.f90 -mpi -LD:= $(MPIFC) - -NETCDF_PATH := $(NETCDF) +NETCDF_PATH := /fs/ssm/hpco/tmp/eccc/201402/04/intel-2016.1.150/ubuntu-14.04-amd64-64/ PIO_CONFIG_OPTS:= --enable-filesystem-hints=gpfs @@ -48,20 +52,12 @@ LIB_MPI := $(IMPILIBDIR) #SLIBS := -L$(LIB_NETCDF) -lnetcdff -lnetcdf -L$(LIB_PNETCDF) -lpnetcdf -lgptl SLIBS := -L$(LIB_NETCDF) -lnetcdff -lnetcdf -SCC:= s.cc - -SFC:= s.f90 - ifeq ($(compile_threaded), true) LDFLAGS += -openmp CFLAGS += -openmp FFLAGS += -openmp endif -ifeq ($(DITTO), yes) - CPPDEFS := $(CPPDEFS) -DREPRODUCIBLE -endif - ### if using parallel I/O, load all 3 libraries. PIO must be first! ifeq ($(IO_TYPE), pio) PIO_PATH:=/glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/lib diff --git a/configuration/scripts/machines/Macros.cheyenne_intel b/configuration/scripts/machines/Macros.cheyenne_intel index f5a01b24d..e2b80d023 100755 --- a/configuration/scripts/machines/Macros.cheyenne_intel +++ b/configuration/scripts/machines/Macros.cheyenne_intel @@ -3,7 +3,7 @@ #============================================================================== CPP := fpp -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 -fp-model precise -xHost FIXEDFLAGS := -132 @@ -17,35 +17,35 @@ else FFLAGS += -O2 endif +SCC := icc +SFC := ifort +MPICC := mpicc +MPIFC := mpif90 + ifeq ($(ICE_COMMDIR), mpi) - FC := mpif90 + FC := $(MPIFC) + CC := $(MPICC) else - FC := ifort + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= mpicc - -MPIFC:= mpif90 -LD:= $(MPIFC) +LD:= $(FC) NETCDF_PATH := $(NETCDF) PIO_CONFIG_OPTS:= --enable-filesystem-hints=gpfs #PNETCDF_PATH := $(PNETCDF) -PNETCDF_PATH := /glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/lib +#PNETCDF_PATH := /glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/lib INCLDIR := $(INCLDIR) LIB_NETCDF := $(NETCDF_PATH)/lib -LIB_PNETCDF := $(PNETCDF_PATH)/lib +#LIB_PNETCDF := $(PNETCDF_PATH)/lib LIB_MPI := $(IMPILIBDIR) -SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -L$(LIB_PNETCDF) -lpnetcdf -lgptl - -SCC:= icc - -SFC:= ifort +#SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -L$(LIB_PNETCDF) -lpnetcdf -lgptl +SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff ifeq ($(compile_threaded), true) LDFLAGS += -qopenmp @@ -53,19 +53,10 @@ ifeq ($(compile_threaded), true) FFLAGS += -qopenmp endif -ifeq ($(DITTO), yes) - CPPDEFS := $(CPPDEFS) -DREPRODUCIBLE -endif - ### if using parallel I/O, load all 3 libraries. PIO must be first! ifeq ($(IO_TYPE), pio) PIO_PATH:=/glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/lib INCLDIR += -I/glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/include SLIBS := $(SLIBS) -L$(PIO_PATH) -lpiof - - CPPDEFS := $(CPPDEFS) -Dncdf endif -ifeq ($(IO_TYPE), netcdf) - CPPDEFS := $(CPPDEFS) -Dncdf -endif diff --git a/configuration/scripts/machines/Macros.conrad_cray b/configuration/scripts/machines/Macros.conrad_cray index b626c2be2..f76652bed 100644 --- a/configuration/scripts/machines/Macros.conrad_cray +++ b/configuration/scripts/machines/Macros.conrad_cray @@ -3,7 +3,7 @@ #============================================================================== CPP := ftn -e P -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 -h fp0 FIXEDFLAGS := -132 @@ -19,16 +19,19 @@ else FFLAGS += -O2 endif +SCC := cc +SFC := ftn +MPICC := cc +MPIFC := ftn + ifeq ($(ICE_COMMDIR), mpi) - FC := ftn + FC := $(MPIFC) + CC := $(MPICC) else - FC := ftn + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= cc - -MPIFC:= ftn -LD:= $(MPIFC) +LD:= $(FC) # defined by module #NETCDF_PATH := $(NETCDF) @@ -46,10 +49,6 @@ INCLDIR := $(INCLDIR) #LIB_MPI := $(IMPILIBDIR) #SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -SCC:= cc - -SFC:= ftn - ifeq ($(ICE_THREADED), false) LDFLAGS += -hnoomp CFLAGS += -hnoomp diff --git a/configuration/scripts/machines/Macros.conrad_gnu b/configuration/scripts/machines/Macros.conrad_gnu index 285ab0c42..2f54d753e 100644 --- a/configuration/scripts/machines/Macros.conrad_gnu +++ b/configuration/scripts/machines/Macros.conrad_gnu @@ -3,7 +3,7 @@ #============================================================================== CPP := ftn -E -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 FIXEDFLAGS := -ffixed-line-length-132 @@ -17,16 +17,19 @@ else FFLAGS += -O2 endif +SCC := cc +SFC := ftn +MPICC := cc +MPIFC := ftn + ifeq ($(ICE_COMMDIR), mpi) - FC := ftn + FC := $(MPIFC) + CC := $(MPICC) else - FC := ftn + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= cc - -MPIFC:= ftn -LD:= $(MPIFC) +LD:= $(FC) # defined by module #NETCDF_PATH := $(NETCDF) @@ -44,9 +47,6 @@ INCLDIR := $(INCLDIR) #LIB_MPI := $(IMPILIBDIR) #SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -SCC:= gcc -SFC:= gfortran - ifeq ($(ICE_THREADED), true) LDFLAGS += -fopenmp CFLAGS += -fopenmp diff --git a/configuration/scripts/machines/Macros.conrad_intel b/configuration/scripts/machines/Macros.conrad_intel index c0b48821f..a5cb37a5c 100644 --- a/configuration/scripts/machines/Macros.conrad_intel +++ b/configuration/scripts/machines/Macros.conrad_intel @@ -3,7 +3,7 @@ #============================================================================== CPP := fpp -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 -fp-model precise -xHost FIXEDFLAGS := -132 @@ -18,16 +18,19 @@ else FFLAGS += -O2 endif +SCC := cc +SFC := ftn +MPICC := cc +MPIFC := ftn + ifeq ($(ICE_COMMDIR), mpi) - FC := ftn + FC := $(MPIFC) + CC := $(MPICC) else - FC := ftn + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= cc - -MPIFC:= ftn -LD:= $(MPIFC) +LD:= $(FC) # defined by module #NETCDF_PATH := $(NETCDF) @@ -45,10 +48,6 @@ INCLDIR := $(INCLDIR) #LIB_MPI := $(IMPILIBDIR) #SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -SCC:= icc - -SFC:= ifort - ifeq ($(ICE_THREADED), true) LDFLAGS += -qopenmp CFLAGS += -qopenmp diff --git a/configuration/scripts/machines/Macros.conrad_pgi b/configuration/scripts/machines/Macros.conrad_pgi index be6fb1ef2..29bcda360 100644 --- a/configuration/scripts/machines/Macros.conrad_pgi +++ b/configuration/scripts/machines/Macros.conrad_pgi @@ -3,7 +3,7 @@ #============================================================================== CPP := pgcc -E -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 ${ICE_CPPDEFS} CFLAGS := -c -O2 -Kieee FIXEDFLAGS := -Mextend @@ -17,16 +17,19 @@ else FFLAGS += -O -g endif +SCC := cc +SFC := ftn +MPICC := cc +MPIFC := ftn + ifeq ($(ICE_COMMDIR), mpi) - FC := ftn + FC := $(MPIFC) + CC := $(MPICC) else - FC := ftn + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= cc - -MPIFC:= ftn -LD:= $(MPIFC) +LD:= $(FC) # defined by module #NETCDF_PATH := $(NETCDF) @@ -44,10 +47,6 @@ INCLDIR := $(INCLDIR) #LIB_MPI := $(IMPILIBDIR) #SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -SCC:= cc - -SFC:= ftn - ifeq ($(ICE_THREADED), true) LDFLAGS += -mp CFLAGS += -mp diff --git a/configuration/scripts/machines/Macros.cori_intel b/configuration/scripts/machines/Macros.cori_intel index cd788649f..8e006e4cc 100644 --- a/configuration/scripts/machines/Macros.cori_intel +++ b/configuration/scripts/machines/Macros.cori_intel @@ -3,7 +3,7 @@ #============================================================================== CPP := fpp -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 -fp-model precise -xHost FIXEDFLAGS := -132 @@ -17,16 +17,19 @@ else FFLAGS += -O2 endif +SCC := cc +SFC := ftn +MPICC := cc +MPIFC := ftn + ifeq ($(ICE_COMMDIR), mpi) - FC := ftn + FC := $(MPIFC) + CC := $(MPICC) else - FC := ftn + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= cc - -MPIFC:= ftn -LD:= $(MPIFC) +LD:= $(FC) # defined by module #NETCDF_PATH := $(NETCDF) @@ -44,10 +47,6 @@ INCLDIR := $(INCLDIR) #LIB_MPI := $(IMPILIBDIR) #SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -SCC:= icc - -SFC:= ifort - ifeq ($(ICE_THREADED), true) LDFLAGS += -qopenmp CFLAGS += -qopenmp diff --git a/configuration/scripts/machines/Macros.fram_intel b/configuration/scripts/machines/Macros.fram_intel index 5181a9e18..406f3b6c3 100755 --- a/configuration/scripts/machines/Macros.fram_intel +++ b/configuration/scripts/machines/Macros.fram_intel @@ -5,7 +5,7 @@ #============================================================================== CPP := fpp -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${CICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 -fp-model precise #-xHost @@ -15,18 +15,28 @@ FFLAGS := -O2 -fp-model precise -convert big_endian -assume byterecl -ftz -t #-xHost FFLAGS_NOOPT:= -O0 -ifeq ($(CICE_COMMDIR), mpi) - FC := s.f90 -mpi +ifeq ($(ICE_BLDDEBUG), true) + FFLAGS += -O0 -g -check -fpe0 -ftrapuv -fp-model except -check noarg_temp_created -init=snan,arrays +# -heap-arrays 1024 else - FC := s.f90 + FFLAGS += -O2 endif -MPICC:= s.cc -mpi +SCC := icc +SFC := ifort +MPICC := mpicc +MPIFC := mpif90 -MPIFC:= s.f90 -mpi -LD:= $(MPIFC) +ifeq ($(ICE_COMMDIR), mpi) + FC := $(MPIFC) + CC := $(MPICC) +else + FC := $(SFC) + CC := $(SCC) +endif +LD:= $(FC) -NETCDF_PATH := $(NETCDF) +NETCDF_PATH := /fs/ssm/hpco/tmp/eccc/201402/04/intel-2016.1.150/ubuntu-14.04-amd64-64/ PIO_CONFIG_OPTS:= --enable-filesystem-hints=gpfs @@ -42,20 +52,12 @@ LIB_MPI := $(IMPILIBDIR) #SLIBS := -L$(LIB_NETCDF) -lnetcdff -lnetcdf -L$(LIB_PNETCDF) -lpnetcdf -lgptl SLIBS := -L$(LIB_NETCDF) -lnetcdff -lnetcdf -SCC:= s.cc - -SFC:= s.f90 - ifeq ($(compile_threaded), true) LDFLAGS += -openmp CFLAGS += -openmp FFLAGS += -openmp endif -ifeq ($(DITTO), yes) - CPPDEFS := $(CPPDEFS) -DREPRODUCIBLE -endif - ### if using parallel I/O, load all 3 libraries. PIO must be first! ifeq ($(IO_TYPE), pio) PIO_PATH:=/glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/lib diff --git a/configuration/scripts/machines/Macros.gaffney_gnu b/configuration/scripts/machines/Macros.gaffney_gnu index c2aaf8ba4..b2f178247 100644 --- a/configuration/scripts/machines/Macros.gaffney_gnu +++ b/configuration/scripts/machines/Macros.gaffney_gnu @@ -3,7 +3,7 @@ #============================================================================== CPP := ftn -E -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 FIXEDFLAGS := -ffixed-line-length-132 @@ -17,17 +17,20 @@ else FFLAGS += -O2 endif +SCC := gcc +SFC := gfortran +MPICC := gcc +MPIFC := gfortran + ifeq ($(ICE_COMMDIR), mpi) - FC := mpif90 + FC := $(MPIFC) + CC := $(MPICC) LDFLAGS += -lmpi else - FC := gfortran + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= mpicc -MPIFC:= mpif90 -#LD:= $(MPIFC) -LD:= gfortran +LD:= $(FC) # defined by module #NETCDF_PATH := $(NETCDF) @@ -45,10 +48,6 @@ LIB_NETCDF := $(NETCDF_PATH)/lib #LIB_MPI := $(IMPILIBDIR) SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -SCC:= icc - -SFC:= ifort - ifeq ($(ICE_THREADED), true) LDFLAGS += -fopenmp CFLAGS += -fopenmp diff --git a/configuration/scripts/machines/Macros.gaffney_intel b/configuration/scripts/machines/Macros.gaffney_intel index 465d7d0e1..bfad4b54c 100644 --- a/configuration/scripts/machines/Macros.gaffney_intel +++ b/configuration/scripts/machines/Macros.gaffney_intel @@ -3,7 +3,7 @@ #============================================================================== CPP := fpp -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 -fp-model precise -xHost FIXEDFLAGS := -132 @@ -18,17 +18,20 @@ else FFLAGS += -O2 endif +SCC := icc +SFC := ifort +MPICC := icc +MPIFC := ifort + ifeq ($(ICE_COMMDIR), mpi) - FC := ifort + FC := $(MPIFC) + CC := $(MPICC) LDFLAGS += -lmpi else - FC := ifort + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= icc - -MPIFC:= ifort -LD:= $(MPIFC) +LD:= $(FC) # defined by module #NETCDF_PATH := $(NETCDF) @@ -46,10 +49,6 @@ LIB_NETCDF := $(NETCDF_PATH)/lib #LIB_MPI := $(IMPILIBDIR) SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -SCC:= icc - -SFC:= ifort - ifeq ($(ICE_THREADED), true) LDFLAGS += -qopenmp CFLAGS += -qopenmp diff --git a/configuration/scripts/machines/Macros.gordon_cray b/configuration/scripts/machines/Macros.gordon_cray index fb46c2641..4f9c43da1 100644 --- a/configuration/scripts/machines/Macros.gordon_cray +++ b/configuration/scripts/machines/Macros.gordon_cray @@ -1,33 +1,37 @@ #============================================================================== -# Macros file for NAVYDSRC gordon, intel compiler +# Macros file for NAVYDSRC gordon, cray compiler #============================================================================== CPP := ftn -e P -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 -h fp0 FIXEDFLAGS := -132 FREEFLAGS := FFLAGS := -h fp0 -h byteswapio FFLAGS_NOOPT:= -O0 -LDFLAGS = -h byteswapio +LDFLAGS := -h byteswapio ifeq ($(ICE_BLDDEBUG), true) FFLAGS += -O0 -g -Rbcdps +# FFLAGS += -O0 -g -Rbcdps -ei else FFLAGS += -O2 endif +SCC := cc +SFC := ftn +MPICC := cc +MPIFC := ftn + ifeq ($(ICE_COMMDIR), mpi) - FC := ftn + FC := $(MPIFC) + CC := $(MPICC) else - FC := ftn + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= cc - -MPIFC:= ftn -LD:= $(MPIFC) +LD:= $(FC) # defined by module #NETCDF_PATH := $(NETCDF) @@ -45,10 +49,6 @@ INCLDIR := $(INCLDIR) #LIB_MPI := $(IMPILIBDIR) #SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -SCC:= cc - -SFC:= ftn - ifeq ($(ICE_THREADED), false) LDFLAGS += -hnoomp CFLAGS += -hnoomp diff --git a/configuration/scripts/machines/Macros.gordon_gnu b/configuration/scripts/machines/Macros.gordon_gnu index c23be2b76..131f539c1 100644 --- a/configuration/scripts/machines/Macros.gordon_gnu +++ b/configuration/scripts/machines/Macros.gordon_gnu @@ -3,7 +3,7 @@ #============================================================================== CPP := ftn -E -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 FIXEDFLAGS := -ffixed-line-length-132 @@ -17,16 +17,19 @@ else FFLAGS += -O2 endif +SCC := cc +SFC := ftn +MPICC := cc +MPIFC := ftn + ifeq ($(ICE_COMMDIR), mpi) - FC := ftn + FC := $(MPIFC) + CC := $(MPICC) else - FC := ftn + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= cc - -MPIFC:= ftn -LD:= $(MPIFC) +LD:= $(FC) # defined by module #NETCDF_PATH := $(NETCDF) @@ -44,9 +47,6 @@ INCLDIR := $(INCLDIR) #LIB_MPI := $(IMPILIBDIR) #SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -SCC:= gcc -SFC:= gfortran - ifeq ($(ICE_THREADED), true) LDFLAGS += -fopenmp CFLAGS += -fopenmp diff --git a/configuration/scripts/machines/Macros.gordon_intel b/configuration/scripts/machines/Macros.gordon_intel index d1ed9cda7..73214ec26 100644 --- a/configuration/scripts/machines/Macros.gordon_intel +++ b/configuration/scripts/machines/Macros.gordon_intel @@ -3,7 +3,7 @@ #============================================================================== CPP := fpp -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 -fp-model precise -xHost FIXEDFLAGS := -132 @@ -17,16 +17,19 @@ else FFLAGS += -O2 endif +SCC := cc +SFC := ftn +MPICC := cc +MPIFC := ftn + ifeq ($(ICE_COMMDIR), mpi) - FC := ftn + FC := $(MPIFC) + CC := $(MPICC) else - FC := ftn + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= cc - -MPIFC:= ftn -LD:= $(MPIFC) +LD:= $(FC) # defined by module #NETCDF_PATH := $(NETCDF) @@ -44,10 +47,6 @@ INCLDIR := $(INCLDIR) #LIB_MPI := $(IMPILIBDIR) #SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -SCC:= icc - -SFC:= ifort - ifeq ($(ICE_THREADED), true) LDFLAGS += -qopenmp CFLAGS += -qopenmp diff --git a/configuration/scripts/machines/Macros.gordon_pgi b/configuration/scripts/machines/Macros.gordon_pgi index 1403cf13a..45d03c64f 100644 --- a/configuration/scripts/machines/Macros.gordon_pgi +++ b/configuration/scripts/machines/Macros.gordon_pgi @@ -3,7 +3,7 @@ #============================================================================== CPP := pgcc -Mcpp -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 ${ICE_CPPDEFS} CFLAGS := -c -O2 -Kieee FIXEDFLAGS := -Mextend @@ -17,16 +17,19 @@ else FFLAGS += -O -g endif +SCC := cc +SFC := ftn +MPICC := cc +MPIFC := ftn + ifeq ($(ICE_COMMDIR), mpi) - FC := ftn + FC := $(MPIFC) + CC := $(MPICC) else - FC := ftn + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= cc - -MPIFC:= ftn -LD:= $(MPIFC) +LD:= $(FC) # defined by module #NETCDF_PATH := $(NETCDF) @@ -44,10 +47,6 @@ INCLDIR := $(INCLDIR) #LIB_MPI := $(IMPILIBDIR) #SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -SCC:= cc - -SFC:= ftn - ifeq ($(ICE_THREADED), true) LDFLAGS += -mp CFLAGS += -mp diff --git a/configuration/scripts/machines/Macros.high_Sierra_gnu b/configuration/scripts/machines/Macros.high_Sierra_gnu new file mode 100644 index 000000000..960779da1 --- /dev/null +++ b/configuration/scripts/machines/Macros.high_Sierra_gnu @@ -0,0 +1,47 @@ +#============================================================================== +# Makefile macros for Travis-CI - GCC and openmpi compilers +#============================================================================== + +CPP := cpp +CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CFLAGS := -c -O2 + +FIXEDFLAGS := -132 +FFLAGS := -O2 -ffree-line-length-none -fconvert=big-endian -finit-real=nan +FFLAGS_NOOPT:= -O0 + +ifeq ($(ICE_BLDDEBUG), true) + #FFLAGS += -O0 -g -Wextra -fbacktrace -fbounds-check -ffpe-trap=zero,overflow + FFLAGS += -O0 -g -std=f2008 -fbacktrace -fbounds-check -ffpe-trap=zero,overflow +else + FFLAGS += -O2 +endif + +FC := mpif90 + +MPICC:= + +MPIFC:= mpif90 +LD:= $(FC) + +NETCDF_PATH := $(NETCDF) + +ifeq ($(ICE_IOTYPE), netcdf) + NETCDF_PATH := $(shell nc-config --prefix) + INCLDIR := $(INCLDIR) -I$(NETCDF_PATH)/include + LIB_NETCDF := $(NETCDF_PATH)/lib + LIB_PNETCDF := + SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff +else + SLIBS := +endif + +LIB_MPI := +SCC:=gcc +SFC:= + +#ifeq ($(ICE_THREADED), true) +# LDFLAGS += -fopenmp +# CFLAGS += -fopenmp +# FFLAGS += -fopenmp +#endif diff --git a/configuration/scripts/machines/Macros.hobart_intel b/configuration/scripts/machines/Macros.hobart_intel index f4176eaf1..7e4984a16 100755 --- a/configuration/scripts/machines/Macros.hobart_intel +++ b/configuration/scripts/machines/Macros.hobart_intel @@ -4,8 +4,8 @@ CPP := /usr/bin/cpp CPPFLAGS := $(CFLAGS) -lifcore -CPPDEFS := $(CPPDEFS) -DFORTRANUNDERSCORE -DCPRINTEL -CFLAGS := -qno-opt-dynamic-align -fp-model precise -std=gnu99 +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} +CFLAGS := -c -qno-opt-dynamic-align -fp-model precise -std=gnu99 FIXEDFLAGS := -fixed -132 FREEFLAGS := -free @@ -18,16 +18,19 @@ ifeq ($(ICE_BLDDEBUG), true) FFLAGS := -qno-opt-dynamic-align -convert big_endian -assume byterecl -ftz -traceback -assume realloc_lhs -fp-model source -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created endif +SCC := icc +SFC := ifort +MPICC := mpicc +MPIFC := mpif90 + ifeq ($(ICE_COMMDIR), mpi) - FC := mpif90 + FC := $(MPIFC) + CC := $(MPICC) else - FC := ifort + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= mpicc - -MPIFC:= mpif90 -LD:= $(MPIFC) +LD:= $(FC) INCLDIR := -I$(NETCDF_PATH)/include -I$(MPI_PATH)/include @@ -37,27 +40,10 @@ LIB_MPI := $(IMPILIBDIR) SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -L/usr/lib64 -llapack -mkl=cluster -SCC:= icc - -SFC:= ifort - -ifeq ($(DITTO), yes) - CPPDEFS := $(CPPDEFS) -DREPRODUCIBLE -endif - -ifeq ($(IO_TYPE), netcdf) - CPPDEFS := $(CPPDEFS) -Dncdf -endif - ## if using parallel I/O, load all 3 libraries. PIO must be first! ifeq ($(IO_TYPE), pio) PIO_PATH:= INCLDIR += -I SLIBS := $(SLIB) -L$(PIO_PATH) -lpiofS - - CPPDEFS := $(CPPDEFS) -Dncdf endif -ifeq ($(IO_TYPE), netcdf) - CPPDEFS := $(CPPDEFS) -Dncdf -endif diff --git a/configuration/scripts/machines/Macros.hobart_nag b/configuration/scripts/machines/Macros.hobart_nag index a7f973a69..02a4bbe65 100755 --- a/configuration/scripts/machines/Macros.hobart_nag +++ b/configuration/scripts/machines/Macros.hobart_nag @@ -4,8 +4,8 @@ CPP := /usr/bin/cpp CPPFLAGS := -P -traditional -CPPDEFS := -DFORTRANUNDERSCORE -DNO_CRAY_POINTERS -DNO_SHR_VMATH -DCPRNAG $(ICE_CPPDEFS) -CFLAGS := -std=gnu99 +CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 $(ICE_CPPDEFS) +CFLAGS := -c -std=gnu99 FIXEDFLAGS := -fixed FREEFLAGS := -free @@ -18,16 +18,19 @@ ifeq ($(ICE_BLDDEBUG), true) FFLAGS := -Wp,-macro=no_com -convert=BIG_ENDIAN -wmismatch=mpi_bcast,mpi_isend,mpi_irecv,mpi_send,mpi_recv,mpi_allreduce -gline -C=all -g -time -f2003 -ieee=stop endif +SCC := cc +SFC := nagfor +MPICC := mpicc +MPIFC := mpif90 + ifeq ($(ICE_COMMDIR), mpi) - FC := mpif90 + FC := $(MPIFC) + CC := $(MPICC) else - FC := nagfor + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= mpicc - -MPIFC:= mpif90 -LD:= $(MPIFC) +LD:= $(FC) INCLDIR := -I$(NETCDF_PATH)/include -I$(MPI_PATH)/include @@ -37,10 +40,6 @@ LIB_MPI := $(IMPILIBDIR) SLIBS := -L/usr/local/nag-6.2/lib/NAG_Fortran -L$(LIB_NETCDF) -lnetcdf -lnetcdff -L/usr/lib64 -llapack -lblas -SCC:= nagcc - -SFC:= nagfor - ## if using parallel I/O, load all 3 libraries. PIO must be first! ifeq ($(IO_TYPE), pio) PIO_PATH:= diff --git a/configuration/scripts/machines/Macros.koehr_intel b/configuration/scripts/machines/Macros.koehr_intel index 90482b618..d77e90768 100644 --- a/configuration/scripts/machines/Macros.koehr_intel +++ b/configuration/scripts/machines/Macros.koehr_intel @@ -3,7 +3,7 @@ #============================================================================== CPP := fpp -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 -fp-model precise -xHost FIXEDFLAGS := -132 @@ -18,17 +18,20 @@ else FFLAGS += -O2 endif +SCC := icc +SFC := ifort +MPICC := icc +MPIFC := ifort + ifeq ($(ICE_COMMDIR), mpi) - FC := ifort + FC := $(MPIFC) + CC := $(MPICC) LDFLAGS += -lmpi else - FC := ifort + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= icc - -MPIFC:= ifort -LD:= $(MPIFC) +LD:= $(FC) # defined by module #NETCDF_PATH := $(NETCDF) @@ -46,10 +49,6 @@ LIB_NETCDF := $(NETCDF_PATH)/lib #LIB_MPI := $(IMPILIBDIR) SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -SCC:= icc - -SFC:= ifort - ifeq ($(ICE_THREADED), true) LDFLAGS += -qopenmp CFLAGS += -qopenmp diff --git a/configuration/scripts/machines/Macros.loft_gnu b/configuration/scripts/machines/Macros.loft_gnu index e5e38b810..12158350e 100644 --- a/configuration/scripts/machines/Macros.loft_gnu +++ b/configuration/scripts/machines/Macros.loft_gnu @@ -4,7 +4,7 @@ CPP := /usr/bin/cpp #CPP := fpp -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 -fp-model precise -xHost FIXEDFLAGS := -132 @@ -27,11 +27,18 @@ else FFLAGS += -O2 endif - FC := gfortran +SCC := gcc +SFC := gfortran +MPICC := gcc +MPIFC := gfortran -MPICC:= - -MPIFC:= +ifeq ($(ICE_COMMDIR), mpi) + FC := $(MPIFC) + CC := $(MPICC) +else + FC := $(SFC) + CC := $(SCC) +endif LD:= $(FC) ifeq ($(ICE_IOTYPE), netcdf) @@ -45,8 +52,6 @@ else endif LIB_MPI := -SCC:= -SFC:= ifeq ($(ICE_THREADED), true) LDFLAGS += -openmp diff --git a/configuration/scripts/machines/Macros.millikan_intel b/configuration/scripts/machines/Macros.millikan_intel new file mode 100644 index 000000000..1d74158c9 --- /dev/null +++ b/configuration/scripts/machines/Macros.millikan_intel @@ -0,0 +1,72 @@ +#============================================================================== +# Makefile macros for "millikan" +#============================================================================== +# For use with intel compiler +#============================================================================== + +CPP := fpp +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} +CFLAGS := -c -O2 -fp-model precise +#-xHost + +FIXEDFLAGS := -132 +FREEFLAGS := -FR +FFLAGS := -fp-model source -convert big_endian -assume byterecl -ftz -traceback -diag-disable 5140 +#-xHost +FFLAGS_NOOPT:= -O0 + +ifeq ($(ICE_BLDDEBUG), true) + FFLAGS += -O0 -g -check -fpe0 -ftrapuv -fp-model except -check noarg_temp_created -init=snan,arrays +# -heap-arrays 1024 +else + FFLAGS += -O2 +endif + +SCC := icc +SFC := ifort +MPICC := mpicc +MPIFC := mpif90 + +ifeq ($(ICE_COMMDIR), mpi) + FC := $(MPIFC) + CC := $(MPICC) +else + FC := $(SFC) + CC := $(SCC) +endif +LD:= $(FC) + +NETCDF_PATH := /fs/ssm/hpco/tmp/eccc/201402/04/intel-2016.1.150/ubuntu-14.04-amd64-64/ + +PIO_CONFIG_OPTS:= --enable-filesystem-hints=gpfs + +#PNETCDF_PATH := $(PNETCDF) +#PNETCDF_PATH := /glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/lib + +INCLDIR := $(INCLDIR) + +LIB_NETCDF := $(NETCDF_PATH)/lib +LIB_PNETCDF := $(PNETCDF_PATH)/lib +LIB_MPI := $(IMPILIBDIR) + +#SLIBS := -L$(LIB_NETCDF) -lnetcdff -lnetcdf -L$(LIB_PNETCDF) -lpnetcdf -lgptl +SLIBS := -L$(LIB_NETCDF) -lnetcdff -lnetcdf + +ifeq ($(compile_threaded), true) + LDFLAGS += -openmp + CFLAGS += -openmp + FFLAGS += -openmp +endif + +### if using parallel I/O, load all 3 libraries. PIO must be first! +ifeq ($(IO_TYPE), pio) + PIO_PATH:=/glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/lib + INCLDIR += -I/glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/include + SLIBS := $(SLIBS) -L$(PIO_PATH) -lpiof + + CPPDEFS := $(CPPDEFS) -Dncdf +endif + +ifeq ($(IO_TYPE), netcdf) + CPPDEFS := $(CPPDEFS) -Dncdf +endif diff --git a/configuration/scripts/machines/Macros.onyx_cray b/configuration/scripts/machines/Macros.onyx_cray index c0d14eb5a..aa185e2e5 100644 --- a/configuration/scripts/machines/Macros.onyx_cray +++ b/configuration/scripts/machines/Macros.onyx_cray @@ -3,7 +3,7 @@ #============================================================================== CPP := ftn -e P -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 -h fp0 FIXEDFLAGS := -132 @@ -18,16 +18,19 @@ else FFLAGS += -O2 endif +SCC := cc +SFC := ftn +MPICC := cc +MPIFC := ftn + ifeq ($(ICE_COMMDIR), mpi) - FC := ftn + FC := $(MPIFC) + CC := $(MPICC) else - FC := ftn + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= cc - -MPIFC:= ftn -LD:= $(MPIFC) +LD:= $(FC) # defined by module #NETCDF_PATH := $(NETCDF) @@ -45,10 +48,6 @@ INCLDIR := $(INCLDIR) #LIB_MPI := $(IMPILIBDIR) #SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -SCC:= cc - -SFC:= ftn - ifeq ($(ICE_THREADED), false) LDFLAGS += -hnoomp CFLAGS += -hnoomp diff --git a/configuration/scripts/machines/Macros.onyx_gnu b/configuration/scripts/machines/Macros.onyx_gnu index b87878d1e..14784e625 100644 --- a/configuration/scripts/machines/Macros.onyx_gnu +++ b/configuration/scripts/machines/Macros.onyx_gnu @@ -3,7 +3,7 @@ #============================================================================== CPP := ftn -E -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 FIXEDFLAGS := -ffixed-line-length-132 @@ -17,16 +17,19 @@ else FFLAGS += -O2 endif +SCC := cc +SFC := ftn +MPICC := cc +MPIFC := ftn + ifeq ($(ICE_COMMDIR), mpi) - FC := ftn + FC := $(MPIFC) + CC := $(MPICC) else - FC := ftn + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= cc - -MPIFC:= ftn -LD:= $(MPIFC) +LD:= $(FC) # defined by module #NETCDF_PATH := $(NETCDF) @@ -44,9 +47,6 @@ INCLDIR := $(INCLDIR) #LIB_MPI := $(IMPILIBDIR) #SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -SCC:= gcc -SFC:= gfortran - ifeq ($(ICE_THREADED), true) LDFLAGS += -fopenmp CFLAGS += -fopenmp diff --git a/configuration/scripts/machines/Macros.onyx_intel b/configuration/scripts/machines/Macros.onyx_intel index 900b46de7..b7f685ac1 100644 --- a/configuration/scripts/machines/Macros.onyx_intel +++ b/configuration/scripts/machines/Macros.onyx_intel @@ -3,7 +3,7 @@ #============================================================================== CPP := fpp -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 -fp-model precise -xHost FIXEDFLAGS := -132 @@ -17,16 +17,19 @@ else FFLAGS += -O2 endif +SCC := cc +SFC := ftn +MPICC := cc +MPIFC := ftn + ifeq ($(ICE_COMMDIR), mpi) - FC := ftn + FC := $(MPIFC) + CC := $(MPICC) else - FC := ftn + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= cc - -MPIFC:= ftn -LD:= $(MPIFC) +LD:= $(FC) # defined by module #NETCDF_PATH := $(NETCDF) @@ -44,10 +47,6 @@ INCLDIR := $(INCLDIR) #LIB_MPI := $(IMPILIBDIR) #SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -SCC:= icc - -SFC:= ifort - ifeq ($(ICE_THREADED), true) LDFLAGS += -qopenmp CFLAGS += -qopenmp diff --git a/configuration/scripts/machines/Macros.phase2_intel b/configuration/scripts/machines/Macros.phase2_intel new file mode 100755 index 000000000..6bfea6b19 --- /dev/null +++ b/configuration/scripts/machines/Macros.phase2_intel @@ -0,0 +1,57 @@ +#============================================================================== +# Makefile macros for generic test machine, intel compiler +#============================================================================== + +CPP := fpp +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} +#CFLAGS := -c -O2 -fp-model precise -xHost +CFLAGS := -c -O2 + +FIXEDFLAGS := -132 +FREEFLAGS := -FR +FFLAGS := -fp-model precise -convert big_endian -assume byterecl -ftz -traceback -xHost +FFLAGS_NOOPT:= -O0 + +ifeq ($(ICE_BLDDEBUG), true) + FFLAGS += -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created +else + FFLAGS += -O2 +endif + +SCC := icc +SFC := ifort +MPICC := mpicc +MPIFC := mpiifort + +ifeq ($(ICE_COMMDIR), mpi) + FC := $(MPIFC) + CC := $(MPICC) +else + FC := $(SFC) + CC := $(SCC) +endif +LD:= $(FC) + +NETCDF_PATH := $(NETCDF) + +PIO_CONFIG_OPTS:= --enable-filesystem-hints=gpfs + +INCLDIR := $(INCLDIR) $(NETCDF_INCLUDE) + +LIB_NETCDF := $(NETCDF_PATH)/lib +LIB_MPI := $(IMPILIBDIR) + +SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff + +ifeq ($(compile_threaded), true) + LDFLAGS += -qopenmp + CFLAGS += -qopenmp + FFLAGS += -qopenmp +endif + +### if using parallel I/O, load all 3 libraries. PIO must be first! +#ifeq ($(IO_TYPE), pio) +# PIO_PATH:=/glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/lib +# INCLDIR += -I/glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/include +# SLIBS := $(SLIBS) -L$(PIO_PATH) -lpiof +#endif diff --git a/configuration/scripts/machines/Macros.phase3_intel b/configuration/scripts/machines/Macros.phase3_intel new file mode 100755 index 000000000..c5a8b7a7e --- /dev/null +++ b/configuration/scripts/machines/Macros.phase3_intel @@ -0,0 +1,58 @@ +#============================================================================== +# Makefile macros for generic test machine, intel compiler +#============================================================================== + +CPP := fpp +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} + +#CFLAGS := -c -O2 -fp-model precise -xHost +CFLAGS := -c -O2 + +FIXEDFLAGS := -132 +FREEFLAGS := -FR +FFLAGS := -fp-model precise -convert big_endian -assume byterecl -ftz -traceback -xHost +FFLAGS_NOOPT:= -O0 + +ifeq ($(ICE_BLDDEBUG), true) + FFLAGS += -O0 -g -check uninit -check bounds -check pointers -fpe0 -check noarg_temp_created +else + FFLAGS += -O2 +endif + +SCC := icc +SFC := ifort +MPICC := mpicc +MPIFC := mpiifort + +ifeq ($(ICE_COMMDIR), mpi) + FC := $(MPIFC) + CC := $(MPICC) +else + FC := $(SFC) + CC := $(SCC) +endif +LD:= $(FC) + +NETCDF_PATH := $(NETCDF) + +PIO_CONFIG_OPTS:= --enable-filesystem-hints=gpfs + +INCLDIR := $(INCLDIR) $(NETCDF_INCLUDE) + +LIB_NETCDF := $(NETCDF_PATH)/lib +LIB_MPI := $(IMPILIBDIR) + +SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff + +ifeq ($(compile_threaded), true) + LDFLAGS += -qopenmp + CFLAGS += -qopenmp + FFLAGS += -qopenmp +endif + +### if using parallel I/O, load all 3 libraries. PIO must be first! +#ifeq ($(IO_TYPE), pio) +# PIO_PATH:=/glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/lib +# INCLDIR += -I/glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/include +# SLIBS := $(SLIBS) -L$(PIO_PATH) -lpiof +#endif diff --git a/configuration/scripts/machines/Macros.testmachine_intel b/configuration/scripts/machines/Macros.testmachine_intel index 6b9a31b77..0d9121667 100755 --- a/configuration/scripts/machines/Macros.testmachine_intel +++ b/configuration/scripts/machines/Macros.testmachine_intel @@ -3,7 +3,7 @@ #============================================================================== CPP := fpp -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 -fp-model precise -xHost FIXEDFLAGS := -132 @@ -17,16 +17,19 @@ else FFLAGS += -O2 endif +SCC := icc +SFC := ifort +MPICC := mpicc +MPIFC := mpif90 + ifeq ($(ICE_COMMDIR), mpi) - FC := mpif90 + FC := $(MPIFC) + CC := $(MPICC) else - FC := ifort + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= mpicc - -MPIFC:= mpif90 -LD:= $(MPIFC) +LD:= $(FC) NETCDF_PATH := $(NETCDF) @@ -43,29 +46,16 @@ LIB_MPI := $(IMPILIBDIR) SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -L$(LIB_PNETCDF) -lpnetcdf -lgptl -SCC:= icc - -SFC:= ifort - ifeq ($(compile_threaded), true) LDFLAGS += -qopenmp CFLAGS += -qopenmp FFLAGS += -qopenmp endif -ifeq ($(DITTO), yes) - CPPDEFS := $(CPPDEFS) -DREPRODUCIBLE -endif - ### if using parallel I/O, load all 3 libraries. PIO must be first! ifeq ($(IO_TYPE), pio) PIO_PATH:=/glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/lib INCLDIR += -I/glade/u/apps/ch/opt/pio/2.2/mpt/2.15f/intel/17.0.1/include SLIBS := $(SLIBS) -L$(PIO_PATH) -lpiof - - CPPDEFS := $(CPPDEFS) -Dncdf endif -ifeq ($(IO_TYPE), netcdf) - CPPDEFS := $(CPPDEFS) -Dncdf -endif diff --git a/configuration/scripts/machines/Macros.theia_gnu b/configuration/scripts/machines/Macros.theia_gnu new file mode 100644 index 000000000..63cce4b99 --- /dev/null +++ b/configuration/scripts/machines/Macros.theia_gnu @@ -0,0 +1,48 @@ +#============================================================================== +# Makefile macros for theia - intel and openmpi compilers +#============================================================================== + +CPP := cpp +CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CFLAGS := -c -O2 -xHost + +FFLAGS := -fconvert=big-endian +#FFLAGS := -h bytwswapio +#FFLAGS := +FFLAGS_NOOPT:= -O0 + +ifeq ($(ICE_BLDDEBUG), true) + FFLAGS += -O0 -g -fcheck=bounds -finit-real=nan -fimplicit-none -ffpe-trap=invalid,zero,overflow +else + FFLAGS += -O2 +endif + +#Note that on theia mpif90 refers to gfortran even though it is in the intel bin +FC := mpif90 + +MPICC:= + +MPIFC:= mpif90 +LD:= $(MPIFC) + +NETCDF_PATH := $(NETCDF) + +ifeq ($(ICE_IOTYPE), netcdf) + NETCDF_PATH := $(shell nc-config --prefix) + INCLDIR := $(INCLDIR) -I$(NETCDF_PATH)/include + LIB_NETCDF := $(NETCDF_PATH)/lib + LIB_PNETCDF := + SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff +else + SLIBS := +endif + +LIB_MPI := +SCC:= +SFC:= + +ifeq ($(ICE_THREADED), true) + LDFLAGS += -fopenmp + CFLAGS += -fopenmp + FFLAGS += -fopenmp +endif diff --git a/configuration/scripts/machines/Macros.theia_intel b/configuration/scripts/machines/Macros.theia_intel new file mode 100644 index 000000000..38cc62ed8 --- /dev/null +++ b/configuration/scripts/machines/Macros.theia_intel @@ -0,0 +1,60 @@ +#============================================================================== +# Makefile macros for theia - intel and openmpi compilers +#============================================================================== + +CPP := cpp +CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CFLAGS := -c -O2 -xHost + +FIXEDFLAGS := -132 +FREEFLAGS := -FR +FFLAGS := -fp-model precise -convert big_endian -assume byterecl -ftz -traceback -xHost + +FFLAGS_NOOPT:= -O0 + +ifeq ($(ICE_BLDDEBUG), true) +#RG: this looks more like gfortran options: +## FFLAGS += -O0 -g -fcheck=bounds -finit-real=nan -fimplicit-none -ffpe-trap=invalid,zero,overflow + FFLAGS += -O0 -g -check-bounds -implicit-none -fpe3 +else + FFLAGS += -O2 +endif + +#Note that on theia mpif90 refers to gfortran even though it is in the intel bin +FC := mpiifort +MPIFC:= mpiifort +LD:= $(MPIFC) + +NETCDF_PATH := $(NETCDF) + +ifeq ($(ICE_IOTYPE), netcdf) + NETCDF_PATH := $(shell nc-config --prefix) + INCLDIR := $(INCLDIR) -I$(NETCDF_PATH)/include + LIB_NETCDF := $(NETCDF_PATH)/lib + LIB_PNETCDF := + SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff +else + SLIBS := +endif + +LIB_MPI := + + +SCC := icc +SFC := ifort +MPICC := mpiicc +MPIFC := mpiifort + +ifeq ($(ICE_COMMDIR), mpi) + FC := $(MPIFC) + CC := $(MPICC) +else + FC := $(SFC) + CC := $(SCC) +endif + +ifeq ($(ICE_THREADED), true) + LDFLAGS += -fopenmp + CFLAGS += -fopenmp + FFLAGS += -fopenmp +endif diff --git a/configuration/scripts/machines/Macros.theia_pgi b/configuration/scripts/machines/Macros.theia_pgi new file mode 100644 index 000000000..83bcc264d --- /dev/null +++ b/configuration/scripts/machines/Macros.theia_pgi @@ -0,0 +1,48 @@ +#============================================================================== +# Makefile macros for theia - intel and openmpi compilers +#============================================================================== + +CPP := cpp +CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CFLAGS := -c -O2 -xHost + +FIXEDFLAGS := -132 +FREEFLAGS := -FR +FFLAGS := -fp-model precise -convert big_endian -assume byterecl -ftz -traceback -xHost + +FFLAGS_NOOPT:= -O0 + +ifeq ($(ICE_BLDDEBUG), true) +#RG: this looks more like gfortran options: +## FFLAGS += -O0 -g -fcheck=bounds -finit-real=nan -fimplicit-none -ffpe-trap=invalid,zero,overflow + FFLAGS += -O0 -g -check-bounds -implicit-none -fpe3 +else + FFLAGS += -O2 +endif + +FC := mpif90 +MPICC:= +MPIFC:= mpif90 +LD:= $(MPIFC) + +NETCDF_PATH := $(NETCDF) + +ifeq ($(ICE_IOTYPE), netcdf) + NETCDF_PATH := $(shell nc-config --prefix) + INCLDIR := $(INCLDIR) -I$(NETCDF_PATH)/include + LIB_NETCDF := $(NETCDF_PATH)/lib + LIB_PNETCDF := + SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff +else + SLIBS := +endif + +LIB_MPI := +SCC:= +SFC:= + +ifeq ($(ICE_THREADED), true) + LDFLAGS += -fopenmp + CFLAGS += -fopenmp + FFLAGS += -fopenmp +endif diff --git a/configuration/scripts/machines/Macros.thunder_intel b/configuration/scripts/machines/Macros.thunder_intel index 343b1e99c..581e31909 100644 --- a/configuration/scripts/machines/Macros.thunder_intel +++ b/configuration/scripts/machines/Macros.thunder_intel @@ -3,7 +3,7 @@ #============================================================================== CPP := fpp -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} CFLAGS := -c -O2 -fp-model precise -xHost FIXEDFLAGS := -132 @@ -17,16 +17,20 @@ else FFLAGS += -O2 endif +SCC := icc +SFC := ifort +MPICC := icc +MPIFC := ifort + ifeq ($(ICE_COMMDIR), mpi) - FC := mpif90 + FC := $(MPIFC) + CC := $(MPICC) + LDFLAGS += -lmpi else - FC := ifort + FC := $(SFC) + CC := $(SCC) endif - -MPICC:= mpicc - -MPIFC:= mpif90 -LD:= $(MPIFC) +LD:= $(FC) # defined by module #NETCDF_PATH := $(NETCDF) @@ -44,10 +48,6 @@ LIB_PNETCDF := $(PNETCDF_PATH)/lib LIB_MPI := $(IMPILIBDIR) SLIBS := -L$(LIB_NETCDF) -lnetcdf -lnetcdff -SCC:= icc - -SFC:= ifort - ifeq ($(ICE_THREADED), true) LDFLAGS += -qopenmp CFLAGS += -qopenmp diff --git a/configuration/scripts/machines/Macros.travisCI_gnu b/configuration/scripts/machines/Macros.travisCI_gnu index c9f0b19da..66fb30a07 100644 --- a/configuration/scripts/machines/Macros.travisCI_gnu +++ b/configuration/scripts/machines/Macros.travisCI_gnu @@ -3,8 +3,8 @@ #============================================================================== CPP := cpp -CPPDEFS := -DFORTRANUNDERSCORE -DNO_R16 -DHAVE_F2008_CONTIGUOUS -DLINUX -DCPRINTEL ${ICE_CPPDEFS} -CFLAGS := -c -O2 -xHost +CPPDEFS := -DFORTRANUNDERSCORE ${ICE_CPPDEFS} +CFLAGS := -c -O2 FIXEDFLAGS := -ffixed-line-length-132 FREEFLAGS := -ffree-form @@ -17,13 +17,22 @@ else FFLAGS += -O2 endif -FC := mpif90 +SCC := gcc +SFC := gfortran +MPICC := mpicc +MPIFC := mpif90 -MPICC:= - -MPIFC:= mpif90 +ifeq ($(ICE_COMMDIR), mpi) + FC := $(MPIFC) + CC := $(MPICC) +else + FC := $(SFC) + CC := $(SCC) +endif LD:= $(FC) +LIB_MPI := + NETCDF_PATH := $(NETCDF) ifeq ($(ICE_IOTYPE), netcdf) @@ -36,10 +45,6 @@ else SLIBS := endif -LIB_MPI := -SCC:= -SFC:= - ifeq ($(ICE_THREADED), true) LDFLAGS += -fopenmp CFLAGS += -fopenmp diff --git a/configuration/scripts/machines/env.badger_intel b/configuration/scripts/machines/env.badger_intel index a184eba51..3e7bb4f8c 100755 --- a/configuration/scripts/machines/env.badger_intel +++ b/configuration/scripts/machines/env.badger_intel @@ -12,10 +12,14 @@ if ("$inp" != "-nomodules") then #module purge #module load intel #module load openmpi - -setenv NETCDF_PATH /usr/projects/climate/SHARED_CLIMATE/software/conejo/netcdf/3.6.3/intel-13.0.1 -setenv PNETCDF_PATH /usr/projects/climate/SHARED_CLIMATE/software/conejo/parallel-netcdf/1.3.1/intel-13.0.1/openmpi-1.6.3 -#setenv LAPACK_LIBDIR /glade/apps/opt/lapack/3.4.2/intel/12.1.5/lib +module unload hdf5-serial +module unload hdf5-parallel +module unload netcdf-serial +module unload netcdf-h5parallel +module load hdf5-serial +module load netcdf-serial/4.4.0 +module load hdf5-parallel +module load netcdf-h5parallel/4.4.0 #setenv OMP_STACKSIZE 256M #setenv MP_LABELIO yes diff --git a/configuration/scripts/machines/env.brooks_intel b/configuration/scripts/machines/env.brooks_intel new file mode 100644 index 000000000..4a51b5bb1 --- /dev/null +++ b/configuration/scripts/machines/env.brooks_intel @@ -0,0 +1,26 @@ +#!/bin/csh -f + +source /opt/modules/default/init/csh +module load PrgEnv-intel # Intel compiler +module load cray-mpich # MPI (Cray MPICH) +module load cray-netcdf # NetCDF +module load cray-hdf5 # HDF5 + +setenv ICE_MACHINE_ENVNAME brooks +setenv ICE_MACHINE_COMPILER intel +setenv ICE_MACHINE_MAKE make +setenv ICE_MACHINE_WKDIR ~/data/brooks/cice/runs +setenv ICE_MACHINE_INPUTDATA /home/ords/cmdd/cmde/phb001/ +setenv ICE_MACHINE_BASELINE ~/data/brooks/cice/baselines +setenv ICE_MACHINE_SUBMIT "qsub" +setenv ICE_MACHINE_TPNODE 36 +setenv ICE_MACHINE_MAXRUNLENGTH 3 +setenv ICE_MACHINE_ACCT P0000000 +setenv ICE_MACHINE_QUEUE "development" +setenv ICE_MACHINE_BLDTHRDS 4 +setenv ICE_MACHINE_QSTAT "qstat " + +if (-e ~/.cice_proj) then + set account_name = `head -1 ~/.cice_proj` + setenv CICE_ACCT ${account_name} +endif diff --git a/configuration/scripts/machines/env.cesium_intel b/configuration/scripts/machines/env.cesium_intel index b4c72820a..1b8717897 100644 --- a/configuration/scripts/machines/env.cesium_intel +++ b/configuration/scripts/machines/env.cesium_intel @@ -1,11 +1,14 @@ #!/bin/csh -f -#. ssmuse-sh -d /fs/ssm/eccc/mrd/rpn/OCEAN/cncpt-3.1.2 -#source NEMO_compiler.ksh +set ssmuse=/fs/ssm/main/env/20180430/all/bin/ssmuse-csh # package loader +source $ssmuse -d /fs/ssm/main/opt/intelcomp/intelcomp-2016.1.156 # intel compiler +source /fs/ssm/main/opt/intelcomp/intelcomp-2016.1.156/intelcomp_2016.1.156_multi/bin/compilervars.csh intel64 # should be sourced by above domain, but bug in csh script +source $ssmuse -d /fs/ssm/main/opt/openmpi/openmpi-1.6.5/intelcomp-2016.1.156 # openmpi +source $ssmuse -d /fs/ssm/hpco/tmp/eccc/201402/04/intel-2016.1.150 # netcdf (and openmpi) setenv ICE_MACHINE_ENVNAME cesium setenv ICE_MACHINE_COMPILER intel -setenv ICE_MACHINE_MAKE make +setenv ICE_MACHINE_MAKE colormake-short setenv ICE_MACHINE_WKDIR /users/dor/afsg/phb/local/CICEDIRS/CICE_RUNS setenv ICE_MACHINE_INPUTDATA /users/dor/afsg/phb/local/FORCING setenv ICE_MACHINE_BASELINE /users/dor/afsg/phb/local/CICEDIRS/CICE_BASELINE @@ -13,7 +16,8 @@ setenv ICE_MACHINE_SUBMIT "qsub" setenv ICE_MACHINE_TPNODE 36 setenv ICE_MACHINE_ACCT P0000000 setenv ICE_MACHINE_QUEUE "debug" -setenv ICE_MACHINE_BLDTHRDS 1 +setenv ICE_MACHINE_BLDTHRDS 4 +setenv ICE_MACHINE_QSTAT "qstat " if (-e ~/.cice_proj) then set account_name = `head -1 ~/.cice_proj` diff --git a/configuration/scripts/machines/env.cheyenne_intel b/configuration/scripts/machines/env.cheyenne_intel index 5f8044521..e05c51db2 100755 --- a/configuration/scripts/machines/env.cheyenne_intel +++ b/configuration/scripts/machines/env.cheyenne_intel @@ -10,11 +10,11 @@ if ("$inp" != "-nomodules") then source /glade/u/apps/ch/opt/lmod/7.2.1/lmod/7.2.1/init/csh module purge -module load ncarenv/1.1 -module load intel/17.0.1 -module load mpt/2.15f -module load ncarcompilers/0.4.1 -module load pio/2.2.0 +module load ncarenv/1.2 +module load intel/19.0.2 +module load mpt/2.19 +module load ncarcompilers/0.5.0 +module load netcdf/4.6.3 endif diff --git a/configuration/scripts/machines/env.conrad_intel b/configuration/scripts/machines/env.conrad_intel index 2411ec703..f3f2c2529 100755 --- a/configuration/scripts/machines/env.conrad_intel +++ b/configuration/scripts/machines/env.conrad_intel @@ -50,6 +50,8 @@ setenv ICE_MACHINE_BASELINE $WORKDIR/CICE_BASELINE setenv ICE_MACHINE_SUBMIT "qsub " setenv ICE_MACHINE_ACCT P00000000 setenv ICE_MACHINE_QUEUE "debug" -setenv ICE_MACHINE_TPNODE 32 # tasks per node +setenv ICE_MACHINE_TPNODE 32 # tasks per node +setenv ICE_MACHINE_MAXPES 8000 # maximum total pes (tasks * threads) available +setenv ICE_MACHINE_MAXRUNLENGTH 168 # maximum batch wall time limit in hours (integer) setenv ICE_MACHINE_BLDTHRDS 4 setenv ICE_MACHINE_QSTAT "qstat " diff --git a/configuration/scripts/machines/env.cori_intel b/configuration/scripts/machines/env.cori_intel index c9183bffe..703177ef7 100755 --- a/configuration/scripts/machines/env.cori_intel +++ b/configuration/scripts/machines/env.cori_intel @@ -13,30 +13,32 @@ module unload PrgEnv-cray module unload PrgEnv-gnu module unload PrgEnv-intel module unload PrgEnv-pgi -module load PrgEnv-intel/6.0.3 +module load PrgEnv-intel/6.0.5 module unload intel -module load intel/17.0.2.174 +module load intel/19.0.3.199 +module unload gcc +module load gcc/8.2.0 module unload cray-mpich module unload cray-mpich-abi -module load cray-mpich/7.4.4 +module load cray-mpich/7.7.6 module unload cray-hdf5 module unload cray-hdf5-parallel module unload cray-netcdf module unload cray-netcdf-hdf5parallel module unload cray-parallel-netcdf -module load cray-netcdf/4.4.1 -module load cray-hdf5/1.10.0 -module load cray-parallel-netcdf/1.7.0 +module load cray-netcdf/4.6.1.3 module unload cray-libsci module unload craype -module load craype/2.5.7 +module load craype/2.5.18 setenv NETCDF_PATH ${NETCDF_DIR} +setenv OMP_PROC_BIND true +setenv OMP_PLACES threads limit coredumpsize unlimited limit stacksize unlimited @@ -46,11 +48,11 @@ setenv ICE_MACHINE_ENVNAME cori setenv ICE_MACHINE_COMPILER intel setenv ICE_MACHINE_MAKE gmake setenv ICE_MACHINE_WKDIR $SCRATCH/CICE_RUNS -setenv ICE_MACHINE_INPUTDATA /global/homes/t/tcraig/cice_consortium +setenv ICE_MACHINE_INPUTDATA /project/projectdirs/ccsm1/cice-consortium/ setenv ICE_MACHINE_BASELINE $SCRATCH/CICE_BASELINE setenv ICE_MACHINE_SUBMIT "sbatch " setenv ICE_MACHINE_ACCT P00000000 -setenv ICE_MACHINE_QUEUE "debug" +setenv ICE_MACHINE_QUEUE "regular" setenv ICE_MACHINE_TPNODE 32 # tasks per node setenv ICE_MACHINE_BLDTHRDS 4 setenv ICE_MACHINE_QSTAT "squeue --jobs=" diff --git a/configuration/scripts/machines/env.fram_intel b/configuration/scripts/machines/env.fram_intel index 5f71b21d9..8b1326913 100755 --- a/configuration/scripts/machines/env.fram_intel +++ b/configuration/scripts/machines/env.fram_intel @@ -1,19 +1,24 @@ #!/bin/csh -f -#. ssmuse-sh -d /fs/ssm/eccc/mrd/rpn/OCEAN/cncpt-3.1.2 -#source NEMO_compiler.ksh +source /fs/ssm/main/opt/ssmuse/ssmuse-1.7/ssmuse_1.7_all/bin/ssmuse-boot.csh >& /dev/null # package loader +set ssmuse=`which ssmuse-csh` +source $ssmuse -d /fs/ssm/main/opt/intelcomp/intelcomp-2016.1.156 # intel compiler +source /fs/ssm/main/opt/intelcomp/intelcomp-2016.1.156/intelcomp_2016.1.156_multi/bin/compilervars.csh intel64 # should be sourced by above domain, but bug in csh script +source $ssmuse -d /fs/ssm/main/opt/openmpi/openmpi-1.6.5/intelcomp-2016.1.156 # openmpi +source $ssmuse -d /fs/ssm/hpco/tmp/eccc/201402/04/intel-2016.1.150 # netcdf (and openmpi) setenv ICE_MACHINE_ENVNAME fram setenv ICE_MACHINE_COMPILER intel setenv ICE_MACHINE_MAKE make -setenv ICE_MACHINE_WKDIR /home/dormrb01/zephyr4/armn/jfl/local1/Minor_modif4july2018/CICE/tests/CICE_RUNS -setenv ICE_MACHINE_INPUTDATA /users/dor/armn/jfl/local1/CICE6/CICE/configuration/data/gx3Ncar -setenv ICE_MACHINE_BASELINE /home/dormrb01/zephyr4/armn/jfl/local1/Minor_modif4july2018/CICE/tests/CICE_BASELINE +setenv ICE_MACHINE_WKDIR /home/dormrb01/zephyr4/armn/jfl/local1/CICEDIRS/CICE_RUNS +setenv ICE_MACHINE_INPUTDATA /users/dor/armn/jfl/local1/FORCING +setenv ICE_MACHINE_BASELINE /home/dormrb01/zephyr4/armn/jfl/local1/CICEDIRS/CICE_BASELINE setenv ICE_MACHINE_SUBMIT "qsub" setenv ICE_MACHINE_QUEUE "default" setenv ICE_MACHINE_TPNODE 36 setenv ICE_MACHINE_ACCT P0000000 setenv ICE_MACHINE_BLDTHRDS 1 +setenv ICE_MACHINE_QSTAT "qstat " if (-e ~/.cice_proj) then set account_name = `head -1 ~/.cice_proj` diff --git a/configuration/scripts/machines/env.high_Sierra_gnu b/configuration/scripts/machines/env.high_Sierra_gnu new file mode 100755 index 000000000..cabd66944 --- /dev/null +++ b/configuration/scripts/machines/env.high_Sierra_gnu @@ -0,0 +1,15 @@ +#!/bin/csh -f + +setenv ICE_MACHINE_ENVNAME high_Sierra +setenv ICE_MACHINE_COMPILER gnu +setenv ICE_MACHINE_MAKE make +setenv ICE_MACHINE_WKDIR /Volumes/ncep/CICE_RUNS +setenv ICE_MACHINE_INPUTDATA /Volumes/Data/CICE_data +setenv ICE_MACHINE_BASELINE /Volumes/ncep/CICE_BASELINE +setenv ICE_MACHINE_SUBMIT " " +setenv ICE_MACHINE_TPNODE 4 +setenv ICE_MACHINE_ACCT P0000000 +setenv ICE_MACHINE_BLDTHRDS 1 +setenv ICE_MACHINE_QSTAT " " +setenv ICE_MACHINE_QUIETMODE false +setenv ICE_MACHINE_QUEUE " " diff --git a/configuration/scripts/machines/env.millikan_intel b/configuration/scripts/machines/env.millikan_intel new file mode 100644 index 000000000..fc81ac097 --- /dev/null +++ b/configuration/scripts/machines/env.millikan_intel @@ -0,0 +1,25 @@ +#!/bin/csh -f + +set ssmuse=/fs/ssm/main/env/20180430/all/bin/ssmuse-csh # package loader +source $ssmuse -d /fs/ssm/main/opt/intelcomp/intelcomp-2016.1.156 # intel compiler +source /fs/ssm/main/opt/intelcomp/intelcomp-2016.1.156/intelcomp_2016.1.156_multi/bin/compilervars.csh intel64 # should be sourced by above domain, but bug in csh script +source $ssmuse -d /fs/ssm/main/opt/openmpi/openmpi-1.6.5/intelcomp-2016.1.156 # openmpi +source $ssmuse -d /fs/ssm/hpco/tmp/eccc/201402/04/intel-2016.1.150 # netcdf (and openmpi) + +setenv ICE_MACHINE_ENVNAME millikan +setenv ICE_MACHINE_COMPILER intel +setenv ICE_MACHINE_MAKE make +setenv ICE_MACHINE_WKDIR /users/dor/armn/amb/data/local/runs +setenv ICE_MACHINE_INPUTDATA /users/dor/armn/amb/data/local/forcing +setenv ICE_MACHINE_BASELINE /users/dor/armn/amb/data/local/baseline +setenv ICE_MACHINE_SUBMIT "qsub" +setenv ICE_MACHINE_TPNODE 36 +setenv ICE_MACHINE_ACCT P0000000 +setenv ICE_MACHINE_QUEUE "debug" +setenv ICE_MACHINE_BLDTHRDS 4 +setenv ICE_MACHINE_QSTAT "qstat " + +if (-e ~/.cice_proj) then + set account_name = `head -1 ~/.cice_proj` + setenv CICE_ACCT ${account_name} +endif diff --git a/configuration/scripts/machines/env.phase2_intel b/configuration/scripts/machines/env.phase2_intel new file mode 100755 index 000000000..2457d4721 --- /dev/null +++ b/configuration/scripts/machines/env.phase2_intel @@ -0,0 +1,14 @@ +#!/bin/csh -f + +setenv ICE_MACHINE_ENVNAME phase2 +setenv ICE_MACHINE_COMPILER intel +setenv ICE_MACHINE_MAKE gmake +setenv ICE_MACHINE_WKDIR ~/noscrub/CICE_RUNS +setenv ICE_MACHINE_INPUTDATA ~/noscrub/ +setenv ICE_MACHINE_BASELINE ~/noscrub/CICE_BASELINE +setenv ICE_MACHINE_SUBMIT " " +setenv ICE_MACHINE_TPNODE 4 +setenv ICE_MACHINE_QUEUE "default" +setenv ICE_MACHINE_ACCT P0000000 +setenv ICE_MACHINE_BLDTHRDS 1 +setenv ICE_MACHINE_QSTAT "qstat " diff --git a/configuration/scripts/machines/env.phase3_intel b/configuration/scripts/machines/env.phase3_intel new file mode 100755 index 000000000..03aa7083f --- /dev/null +++ b/configuration/scripts/machines/env.phase3_intel @@ -0,0 +1,24 @@ +#!/bin/csh -f --login + +source /etc/profile.d/lmod.csh + +module purge +module load ips/18.0.1.163 +module load impi/18.0.1 +module load NetCDF/4.5.0 +module load ESMF/7_1_0r +module list + +setenv ICE_MACHINE_ENVNAME phase3 +setenv ICE_MACHINE_COMPILER intel +setenv ICE_MACHINE_MAKE gmake +setenv ICE_MACHINE_WKDIR /u/Robert.Grumbine/noscrub/CICE_RUNS +setenv ICE_MACHINE_INPUTDATA /u/Robert.Grumbine/noscrub/ +setenv ICE_MACHINE_BASELINE /u/Robert.Grumbine/noscrub/CICE_BASELINE +setenv ICE_MACHINE_SUBMIT " " +setenv ICE_MACHINE_TPNODE 4 +setenv ICE_MACHINE_QUEUE "default" +setenv ICE_MACHINE_ACCT P0000000 +setenv ICE_MACHINE_BLDTHRDS 1 +setenv ICE_MACHINE_QSTAT "qstat " + diff --git a/configuration/scripts/machines/env.theia_gnu b/configuration/scripts/machines/env.theia_gnu new file mode 100755 index 000000000..974be35e0 --- /dev/null +++ b/configuration/scripts/machines/env.theia_gnu @@ -0,0 +1,15 @@ +#!/bin/csh -f + +setenv ICE_MACHINE_ENVNAME theia +setenv ICE_MACHINE_COMPILER gnu +setenv ICE_MACHINE_MAKE make +setenv ICE_MACHINE_WKDIR /home/Robert.Grumbine/save/CICE_RUNS +setenv ICE_MACHINE_INPUTDATA /home/Robert.Grumbine/save/ +setenv ICE_MACHINE_BASELINE /home/Robert.Grumbine/save/CICE_BASELINE +setenv ICE_MACHINE_SUBMIT " " +setenv ICE_MACHINE_TPNODE 4 +setenv ICE_MACHINE_ACCT P0000000 +setenv ICE_MACHINE_QUEUE "default" +setenv ICE_MACHINE_BLDTHRDS 1 +setenv ICE_MACHINE_QSTAT " " +setenv ICE_MACHINE_QUIETMODE true diff --git a/configuration/scripts/machines/env.theia_intel b/configuration/scripts/machines/env.theia_intel new file mode 100755 index 000000000..0438238d9 --- /dev/null +++ b/configuration/scripts/machines/env.theia_intel @@ -0,0 +1,30 @@ +#!/bin/csh -f + +source /etc/profile.d/modules.csh +#module list +module purge +#module load intel/18.1.163 Works, at least w. nc 4.4.0, +#14.0.2 w nc4.3.0 does not module load intel/18.1.163 +# ok w nc4.3.0: module load intel/16.0.1.150 +# ok w nc4.3.0: module load intel/15.3.187 +module load intel/17.0.5.239 +module load impi +module load esmf +module load hdf5 netcdf/4.3.0 +module load wgrib wgrib2 +#echo renewed modules: +#module list + +setenv ICE_MACHINE_ENVNAME theia +setenv ICE_MACHINE_COMPILER intel +setenv ICE_MACHINE_MAKE make +setenv ICE_MACHINE_WKDIR /home/Robert.Grumbine/save/CICE_RUNS +setenv ICE_MACHINE_INPUTDATA /scratch3/NCEPDEV/marine/save/Robert.Grumbine/ +setenv ICE_MACHINE_BASELINE /home/Robert.Grumbine/save/CICE_BASELINE +setenv ICE_MACHINE_SUBMIT "sbatch" +setenv ICE_MACHINE_TPNODE 4 +setenv ICE_MACHINE_ACCT P0000000 +setenv ICE_MACHINE_QUEUE "batch" +setenv ICE_MACHINE_BLDTHRDS 1 +setenv ICE_MACHINE_QSTAT " " +#setenv ICE_MACHINE_QUIETMODE true diff --git a/configuration/scripts/machines/env.theia_pgi b/configuration/scripts/machines/env.theia_pgi new file mode 100755 index 000000000..2c4229c0e --- /dev/null +++ b/configuration/scripts/machines/env.theia_pgi @@ -0,0 +1,27 @@ +#!/bin/csh -f + +source /etc/profile.d/modules.csh +module list +module purge +module load pgi +module load impi +module load esmf +module load hdf5 netcdf/4.4.0 +module load wgrib wgrib2 +echo renewed modules: +module list + + +setenv ICE_MACHINE_ENVNAME theia +setenv ICE_MACHINE_COMPILER pgi +setenv ICE_MACHINE_MAKE make +setenv ICE_MACHINE_WKDIR /home/Robert.Grumbine/save/CICE_RUNS +setenv ICE_MACHINE_INPUTDATA /scratch3/NCEPDEV/marine/save/Robert.Grumbine/ +setenv ICE_MACHINE_BASELINE /home/Robert.Grumbine/save/CICE_BASELINE +setenv ICE_MACHINE_SUBMIT " " +setenv ICE_MACHINE_TPNODE 4 +setenv ICE_MACHINE_ACCT P0000000 +setenv ICE_MACHINE_QUEUE "default" +setenv ICE_MACHINE_BLDTHRDS 1 +setenv ICE_MACHINE_QSTAT " " +#setenv ICE_MACHINE_QUIETMODE true diff --git a/configuration/scripts/machines/env.travisCI_gnu b/configuration/scripts/machines/env.travisCI_gnu index 687c4ba07..f3c9da26b 100755 --- a/configuration/scripts/machines/env.travisCI_gnu +++ b/configuration/scripts/machines/env.travisCI_gnu @@ -7,7 +7,9 @@ setenv ICE_MACHINE_WKDIR ~/CICE_RUNS setenv ICE_MACHINE_INPUTDATA ~ setenv ICE_MACHINE_BASELINE ~/CICE_BASELINE setenv ICE_MACHINE_SUBMIT " " -setenv ICE_MACHINE_TPNODE 4 +setenv ICE_MACHINE_TPNODE 4 # maximum tasks per node +setenv ICE_MACHINE_MAXPES 4 # maximum total pes (tasks * threads) available +setenv ICE_MACHINE_MAXRUNLENGTH 1 # maximum batch wall time limit in hours (integer) setenv ICE_MACHINE_ACCT P0000000 setenv ICE_MACHINE_QUEUE "default" setenv ICE_MACHINE_BLDTHRDS 1 diff --git a/configuration/scripts/options/set_env.buildclean b/configuration/scripts/options/set_env.buildclean new file mode 100644 index 000000000..932a04eda --- /dev/null +++ b/configuration/scripts/options/set_env.buildclean @@ -0,0 +1 @@ +setenv ICE_CLEANBUILD true diff --git a/configuration/scripts/options/set_env.buildincremental b/configuration/scripts/options/set_env.buildincremental new file mode 100644 index 000000000..3817d449c --- /dev/null +++ b/configuration/scripts/options/set_env.buildincremental @@ -0,0 +1 @@ +setenv ICE_CLEANBUILD false diff --git a/configuration/scripts/options/set_nml.alt04 b/configuration/scripts/options/set_nml.alt04 index 04e58be84..67045b16c 100644 --- a/configuration/scripts/options/set_nml.alt04 +++ b/configuration/scripts/options/set_nml.alt04 @@ -1,6 +1,6 @@ ice_ic = 'default' restart = .false. -bfbflag = .true. +bfbflag = 'reprosum' distribution_type = 'rake' processor_shape = 'slenderX2' distribution_wght = 'block' diff --git a/configuration/scripts/options/set_nml.reprosum b/configuration/scripts/options/set_nml.reprosum new file mode 100644 index 000000000..6526f0d8c --- /dev/null +++ b/configuration/scripts/options/set_nml.reprosum @@ -0,0 +1 @@ +bfbflag = 'reprosum' diff --git a/configuration/scripts/tests/QC/cice.t-test.py b/configuration/scripts/tests/QC/cice.t-test.py index 0954cddf8..86938d8e8 100755 --- a/configuration/scripts/tests/QC/cice.t-test.py +++ b/configuration/scripts/tests/QC/cice.t-test.py @@ -109,12 +109,16 @@ def calc_diff(data_a, data_b): np.all(np.equal(data_d, 0.), axis=0), np.all(data_a < 0.01, axis=0))\ , np.all(data_b < 0.01, axis=0)) mask_array_a = np.zeros_like(data_d) + for x, value in np.ndenumerate(mask_d): i, j = x mask_array_a[:, i, j] = value + del mask_d + data_a = ma.masked_array(data_a, mask=mask_array_a) data_b = ma.masked_array(data_b, mask=mask_array_a) data_d = ma.masked_array(data_d, mask=mask_array_a) + del mask_array_a return data_a, data_b, data_d @@ -145,10 +149,10 @@ def stage_one(data_d, num_files, mean_d, variance_d): r1_den2 = np.zeros_like(mean_d) for i in np.arange(np.size(data_a, axis=0)-1): r1_num = r1_num + (data_d[i, :, :]-mean_nm1_d[:, :])*(data_d[i+1, :, :]-mean_2n_d[:, :]) - r1_den1 = r1_den1 + np.power(data_d[i, :, :]-mean_nm1_d[:, :], 2) + r1_den1 = r1_den1 + np.square(data_d[i, :, :]-mean_nm1_d[:, :]) for i in np.arange(1, np.size(data_a, axis=0)): - r1_den2 = r1_den2 + np.power(data_d[i, :, :] - mean_2n_d[:, :], 2) + r1_den2 = r1_den2 + np.square(data_d[i, :, :] - mean_2n_d[:, :]) r1 = r1_num / np.sqrt(r1_den1*r1_den2) @@ -174,6 +178,13 @@ def stage_one(data_d, num_files, mean_d, variance_d): for x in maenumerate(data_d): min_val = np.min(np.abs(df[x]-df_table)) idx = np.where(np.abs(df[x]-df_table) == min_val) + # Handle the cases where the data point falls exactly half way between + # 2 critical T-values (i.e., idx has more than 1 value in it) + while True: + try: + idx = idx[0] + except: + break t_crit[x] = t_crit_table[idx] # Create an array of Pass / Fail values for each grid cell @@ -183,7 +194,14 @@ def stage_one(data_d, num_files, mean_d, variance_d): # Calculate the mean of the difference mean_d = np.mean(data_d, axis=0) - variance_d = np.sum(np.power(data_d - mean_d, 2)) / (num_files - 1) + + # Loop through each timestep and calculate the square of the difference. + # This is required (instead of just np.square(data_d - mean_d) to reduce + # the memory footprint of the script. + tmp1 = np.zeros_like(data_d) + for i in np.arange(np.shape(data_d)[0]): + tmp1[i,:,:] = np.square(data_d[i,:,:] - mean_d[:,:]) + variance_d = np.sum(tmp1) / float(num_files - 1) n_eff, H1, r1, t_crit = stage_one(data_d, num_files, mean_d, variance_d) @@ -220,6 +238,13 @@ def stage_one(data_d, num_files, mean_d, variance_d): for x in maenumerate(data_d): min_val = np.min(np.abs(r1[x]-r1_table)) idx = np.where(np.abs(r1[x]-r1_table) == min_val) + # Handle the cases where the data point falls exactly half way between + # 2 critical T-values (i.e., idx has more than 1 value in it) + while True: + try: + idx = idx[0] + except: + break t_crit[x] = t_crit_table[idx] # Create an array showing locations of Pass / Fail grid cells @@ -300,8 +325,8 @@ def skill_test(path_a, fname, data_a, data_b, num_files, hemisphere): area_var_a = 0 area_var_b = 0 for t in np.arange(num_files): - area_var_a = area_var_a + np.sum(area_weight*np.power(data_a[t, :, :]-weighted_mean_a, 2)) - area_var_b = area_var_b + np.sum(area_weight*np.power(data_b[t, :, :]-weighted_mean_b, 2)) + area_var_a = area_var_a + np.sum(area_weight*np.square(data_a[t, :, :]-weighted_mean_a)) + area_var_b = area_var_b + np.sum(area_weight*np.square(data_b[t, :, :]-weighted_mean_b)) area_var_a = nonzero_weights / (num_files * nonzero_weights - 1.) * area_var_a area_var_b = nonzero_weights / (num_files * nonzero_weights - 1.) * area_var_b @@ -317,8 +342,8 @@ def skill_test(path_a, fname, data_a, data_b, num_files, hemisphere): weighted_r = combined_cov / (std_a*std_b) - s = np.power((1+weighted_r)*(std_a*std_b)/\ - (area_var_a + area_var_b), 2) + s = np.square((1+weighted_r)*(std_a*std_b)/\ + (area_var_a + area_var_b)) logger.debug('%s Hemisphere skill score = %f', hemisphere, s) @@ -333,48 +358,102 @@ def skill_test(path_a, fname, data_a, data_b, num_files, hemisphere): logger.info('Quadratic Skill Test Failed for %s Hemisphere', hemisphere) return False -def plot_data(data, lat, lon, units): - '''This function plots CICE data and creates a .png file (ice_thickness_map.png).''' +def plot_data(data, lat, lon, units, case, plot_type): + '''This function plots CICE data and creates a .png file.''' try: - logger.info('Creating map of the data (ice_thickness_map.png)') # Load the necessary plotting libraries import matplotlib.pyplot as plt from mpl_toolkits.basemap import Basemap from mpl_toolkits.axes_grid1 import make_axes_locatable except ImportError: logger.warning('Error loading necessary Python modules in plot_data function') + return # Suppress Matplotlib deprecation warnings import warnings warnings.filterwarnings("ignore", category=UserWarning) # Create the figure and axis - fig = plt.figure(figsize=(12, 8)) - ax = fig.add_axes([0.05, 0.08, 0.9, 0.9]) + fig, axes = plt.subplots(nrows=1, ncols=2,figsize=(14, 8)) + # Plot the northern hemisphere data as a scatter plot # Create the basemap, and draw boundaries - m = Basemap(projection='kav7', lon_0=180., resolution='l') - m.drawmapboundary(fill_color='white') + plt.sca(axes[0]) + m = Basemap(projection='npstere', boundinglat=35,lon_0=270, resolution='l') m.drawcoastlines() + m.fillcontinents() m.drawcountries() - # Plot the data as a scatter plot - x, y = m(lon, lat) - sc = m.scatter(x, y, c=data, cmap='jet', lw=0) + if plot_type == 'scatter': + x, y = m(lon,lat) + sc = m.scatter(x, y, c=data, cmap='jet', lw=0, s=4) + else: + # Create new arrays to add 1 additional longitude value to prevent a + # small amount of whitespace around longitude of 0/360 degrees. + lon_cyc = np.zeros((lon.shape[0],lon.shape[1]+1)) + mask = np.zeros((data.shape[0],data.shape[1]+1)) + lat_cyc = np.zeros((lat.shape[0],lat.shape[1]+1)) + + mask[:,0:-1] = data.mask[:,:] + mask[:,-1] = data.mask[:,0] + lon_cyc[:,0:-1] = lon[:,:]; lon_cyc[:,-1] = lon[:,0] + lat_cyc[:,0:-1] = lat[:,:]; lat_cyc[:,-1] = lat[:,0] + + lon1 = np.ma.masked_array(lon_cyc, mask=mask) + lat1 = np.ma.masked_array(lat_cyc, mask=mask) + + d = np.zeros((data.shape[0],data.shape[1]+1)) + d[:,0:-1] = data[:,:] + d[:,-1] = data[:,0] + d1 = np.ma.masked_array(d,mask=mask) + + x, y = m(lon1.data, lat1.data) + + if plot_type == 'contour': + sc = m.contourf(x, y, d1, cmap='jet') + else: # pcolor + sc = m.pcolor(x, y, d1, cmap='jet') + + m.drawparallels(np.arange(-90.,120.,15.),labels=[1,0,0,0]) # draw parallels + m.drawmeridians(np.arange(0.,420.,30.),labels=[1,1,1,1]) # draw meridians + + # Plot the southern hemisphere data as a scatter plot + plt.sca(axes[1]) + m = Basemap(projection='spstere', boundinglat=-45,lon_0=270, resolution='l') + m.drawcoastlines() + m.fillcontinents() + m.drawcountries() - m.drawmeridians(np.arange(0, 360, 60), labels=[0, 0, 0, 1], fontsize=10) - m.drawparallels(np.arange(-90, 90, 30), labels=[1, 0, 0, 0], fontsize=10) + if plot_type == 'scatter': + x, y = m(lon,lat) + sc = m.scatter(x, y, c=data, cmap='jet', lw=0, s=4) + else: + x, y = m(lon1.data, lat1.data) + + # Bandaid for a bug in the version of Basemap used during development + outside = (x <= m.xmin) | (x >= m.xmax) | (y <= m.ymin) | (y >= m.ymax) + tmp = np.ma.masked_where(outside,d1) + + if plot_type == 'contour': + sc = m.contourf(x, y, tmp, cmap='jet') + else: # pcolor + sc = m.pcolor(x, y, tmp, cmap='jet') - plt.title('CICE Ice Thickness') + m.drawparallels(np.arange(-90.,120.,15.),labels=[1,0,0,0]) # draw parallels + m.drawmeridians(np.arange(0.,420.,30.),labels=[1,1,1,1]) # draw meridians - # Create the colorbar and add Pass / Fail labels - divider = make_axes_locatable(ax) - cax = divider.append_axes("bottom", size="5%", pad=0.5) - cb = plt.colorbar(sc, cax=cax, orientation="horizontal", format="%.2f") + plt.suptitle('CICE Mean Ice Thickness\n{}'.format(case), y=0.95) + + # Make some room at the bottom of the figure, and create a colorbar + fig.subplots_adjust(bottom=0.2) + cbar_ax = fig.add_axes([0.11,0.1,0.8,0.05]) + cb = plt.colorbar(sc, cax=cbar_ax, orientation="horizontal", format="%.2f") cb.set_label(units, x=1.0) - plt.savefig('ice_thickness_map.png', dpi=300) + outfile = 'ice_thickness_{}.png'.format(case.replace('\n- ','_minus_')) + logger.info('Creating map of the data ({})'.format(outfile)) + plt.savefig(outfile, dpi=300, bbox_inches='tight') def plot_two_stage_failures(data, lat, lon): '''This function plots each grid cell and whether or not it Passed or Failed @@ -403,7 +482,7 @@ def plot_two_stage_failures(data, lat, lon): ax = fig.add_axes([0.05, 0.08, 0.9, 0.9]) # Create the basemap, and draw boundaries - m = Basemap(projection='kav7', lon_0=180., resolution='l') + m = Basemap(projection='moll', lon_0=0., resolution='l') m.drawmapboundary(fill_color='white') m.drawcoastlines() m.drawcountries() @@ -415,7 +494,7 @@ def plot_two_stage_failures(data, lat, lon): # Plot the data as a scatter plot x, y = m(lon, lat) - sc = m.scatter(x, y, c=int_data, cmap=cm, lw=0, vmin=0, vmax=1) + sc = m.scatter(x, y, c=int_data, cmap=cm, lw=0, vmin=0, vmax=1, s=4) m.drawmeridians(np.arange(0, 360, 60), labels=[0, 0, 0, 1], fontsize=10) m.drawparallels(np.arange(-90, 90, 30), labels=[1, 0, 0, 0], fontsize=10) @@ -457,8 +536,11 @@ def main(): help='Path to the test history (iceh_inst*) files. REQUIRED') parser.add_argument('-v', '--verbose', dest='verbose', help='Print debug output?', \ action='store_true') + parser.add_argument('-pt','--plot_type', dest='plot_type', help='Specify type of plot \ + to create', choices=['scatter','contour','pcolor']) parser.set_defaults(verbose=False) + parser.set_defaults(plot_type='pcolor') # If no arguments are provided, print the help message if len(sys.argv) == 1: @@ -503,6 +585,17 @@ def main(): # If test failed, attempt to create a plot of the failure locations if not PASSED: plot_two_stage_failures(H1_array, t_lat, t_lon) + + # Create plots of mean ice thickness + baseDir = os.path.abspath(args.base_dir).rstrip('history/').rstrip(\ + 'history').split('/')[-1] + testDir = os.path.abspath(args.test_dir).rstrip('history/').rstrip( \ + 'history').split('/')[-1] + plot_data(np.mean(data_base,axis=0), t_lat, t_lon, 'm', baseDir, args.plot_type) + plot_data(np.mean(data_test,axis=0), t_lat, t_lon, 'm', testDir, args.plot_type) + plot_data(np.mean(data_base-data_test,axis=0), t_lat, t_lon, 'm', '{}\n- {}'.\ + format(baseDir,testDir), args.plot_type) + logger.error('Quality Control Test FAILED') sys.exit(-1) @@ -534,6 +627,16 @@ def main(): PASSED_SKILL = PASSED_NH and PASSED_SH + # Plot the ice thickness data for the base and test cases + baseDir = os.path.abspath(args.base_dir).rstrip('history/').rstrip( \ + 'history').split('/')[-1] + testDir = os.path.abspath(args.test_dir).rstrip('history/').rstrip( \ + 'history').split('/')[-1] + plot_data(np.mean(data_base,axis=0), t_lat, t_lon, 'm', baseDir, args.plot_type) + plot_data(np.mean(data_test,axis=0), t_lat, t_lon, 'm', testDir, args.plot_type) + plot_data(np.mean(data_base-data_test,axis=0), t_lat, t_lon, 'm', '{}\n- {}'.\ + format(baseDir,testDir), args.plot_type) + logger.info('') if not PASSED_SKILL: logger.error('Quality Control Test FAILED') diff --git a/configuration/scripts/tests/QC/compare_qc_cases.csh b/configuration/scripts/tests/QC/compare_qc_cases.csh index 90086458f..8ce2b3d41 100755 --- a/configuration/scripts/tests/QC/compare_qc_cases.csh +++ b/configuration/scripts/tests/QC/compare_qc_cases.csh @@ -28,7 +28,7 @@ echo "===== Running QC tests and writing output to $QC_DIR/validate_qc.log ===== echo "Running QC test on base and bfb directories." echo "Expected result: PASSED" ./configuration/scripts/tests/QC/cice.t-test.py $base_histdir $bfb_histdir >& $QC_DIR/validate_qc.log -set case1="$?" +set case1="$status" if ($case1 == $QC_SUCCESS) then echo "Result: PASSED" else @@ -39,7 +39,7 @@ echo "-----------------------------------------------" echo "Running QC test on base and non-bfb directories." echo "Expected result: PASSED" ./configuration/scripts/tests/QC/cice.t-test.py $base_histdir $nonbfb_histdir >>& $QC_DIR/validate_qc.log -set case2="$?" +set case2="$status" if ($case2 == $QC_SUCCESS) then echo "Result: PASSED" else @@ -50,7 +50,7 @@ echo "-----------------------------------------------" echo "Running QC test on base and climate-changing directories." echo "Expected result: FAILED" ./configuration/scripts/tests/QC/cice.t-test.py $base_histdir $fail_histdir >>& $QC_DIR/validate_qc.log -set case3="$?" +set case3="$status" if ($case3 == $QC_SUCCESS) then echo "Result: PASSED" else diff --git a/configuration/scripts/tests/base_suite.ts b/configuration/scripts/tests/base_suite.ts index 19d073a97..413047e7a 100644 --- a/configuration/scripts/tests/base_suite.ts +++ b/configuration/scripts/tests/base_suite.ts @@ -22,13 +22,13 @@ restart gx3 8x2 alt02,debug,short restart gx3 4x2 alt03,debug,short smoke gx3 4x4 alt04,debug,short smoke gx3 4x4 alt05,debug,short -restart gbox128 4x2 none -restart gbox128 4x2 boxdyn -restart gbox128 4x2 boxdyn,debug +restart gbox128 4x2 short +restart gbox128 4x2 boxdyn,short +restart gbox128 4x2 boxdyn,short,debug restart gbox128 2x2 boxadv,short smoke gbox128 2x2 boxadv,short,debug -restart gbox128 4x4 boxrestore -smoke gbox128 4x4 boxrestore,debug +restart gbox128 4x4 boxrestore,short +smoke gbox128 4x4 boxrestore,short,debug restart gbox80 1x1 box2001 smoke gbox80 1x1 boxslotcyl smoke gx3 8x2 bgcz diff --git a/configuration/scripts/tests/baseline.script b/configuration/scripts/tests/baseline.script index f9bec5e4d..9e78027a1 100644 --- a/configuration/scripts/tests/baseline.script +++ b/configuration/scripts/tests/baseline.script @@ -20,11 +20,8 @@ endif # Baseline comparing run if (${ICE_BASECOM} != ${ICE_SPVAL}) then - set test_file = `ls -t1 ${ICE_RUNDIR}/restart | head -1` - set test_data = ${ICE_RUNDIR}/restart/${test_file} - - set baseline_dir = ${ICE_BASELINE}/${ICE_BASECOM}/${ICE_TESTNAME}/restart - set baseline_data = ${baseline_dir}/${test_file} + set test_dir = ${ICE_RUNDIR}/restart + set base_dir = ${ICE_BASELINE}/${ICE_BASECOM}/${ICE_TESTNAME}/restart set baseline_log = `ls -1t ${ICE_BASELINE}/${ICE_BASECOM}/${ICE_TESTNAME}/cice.runlog* | head -1` set btimeloop = -1 @@ -41,20 +38,20 @@ if (${ICE_BASECOM} != ${ICE_SPVAL}) then echo "" echo "Regression Compare Mode:" - echo "Performing binary comparison between files" - echo "baseline: ${baseline_data}" - echo "test: ${test_data}" - if (-e ${baseline_data} ) then - if ( { cmp -s ${test_data} ${baseline_data} } ) then - echo "PASS ${ICE_TESTNAME} compare ${ICE_BASECOM} ${btimeloop} ${bdynamics} ${bcolumn}" >> ${ICE_CASEDIR}/test_output - echo "Regression baseline and test dataset are identical" - else - echo "FAIL ${ICE_TESTNAME} compare ${ICE_BASECOM} ${btimeloop} ${bdynamics} ${bcolumn} different-data" >> ${ICE_CASEDIR}/test_output - echo "Regression baseline and test dataset are different" - endif - else + echo "base_dir: ${base_dir}" + echo "test_dir: ${test_dir}" + + ${ICE_CASEDIR}/casescripts/comparebfb.csh ${base_dir} ${test_dir} + set bfbstatus = $status + if ( ${bfbstatus} == 0 ) then + echo "PASS ${ICE_TESTNAME} compare ${ICE_BASECOM} ${btimeloop} ${bdynamics} ${bcolumn}" >> ${ICE_CASEDIR}/test_output + echo "Regression baseline and test dataset are identical" + else if ( ${bfbstatus} == 2 ) then echo "MISS ${ICE_TESTNAME} compare ${ICE_BASECOM} ${btimeloop} ${bdynamics} ${bcolumn} missing-data" >> ${ICE_CASEDIR}/test_output - echo "Baseline file does not exist" + echo "Missing data" + else + echo "FAIL ${ICE_TESTNAME} compare ${ICE_BASECOM} ${btimeloop} ${bdynamics} ${bcolumn} different-data" >> ${ICE_CASEDIR}/test_output + echo "Regression baseline and test dataset are different" endif endif @@ -64,24 +61,39 @@ endif if (${ICE_BFBCOMP} != ${ICE_SPVAL}) then - set test_file = `ls -t1 ${ICE_RUNDIR}/restart | head -1` - if (${test_file} != "") then - set test_data = ${ICE_RUNDIR}/restart/${test_file} - else - set test_data = "NoThInG__Here" - endif - set comp_file = `ls -t1 ${ICE_RUNDIR}/../${ICE_BFBCOMP}/restart | head -1` - if (${comp_file} != "") then - set comp_data = ${ICE_RUNDIR}/../${ICE_BFBCOMP}/restart/${comp_file} + if (${ICE_TEST} == "logbfb") then + set test_file = `ls -1t ${ICE_RUNDIR}/cice.runlog* | head -1` + set base_file = `ls -1t ${ICE_RUNDIR}/../${ICE_BFBCOMP}.${ICE_TESTID}/cice.runlog* | head -1` + + echo "" + echo "bfb Log Compare Mode:" + echo "base_file: ${base_file}" + echo "test_file: ${test_file}" + + ${ICE_CASEDIR}/casescripts/comparelog.csh ${base_file} ${test_file} + set bfbstatus = $status else - set comp_data = "NoThInG__Here" + set test_dir = ${ICE_RUNDIR}/restart + set base_dir = ${ICE_RUNDIR}/../${ICE_BFBCOMP}.${ICE_TESTID}/restart + + echo "" + echo "bfb Compare Mode:" + echo "base_dir: ${base_dir}" + echo "test_dir: ${test_dir}" + + ${ICE_CASEDIR}/casescripts/comparebfb.csh ${base_dir} ${test_dir} + set bfbstatus = $status endif - if (-e ${comp_data} && -e ${test_data}) then - ${ICE_CASEDIR}/casescripts/comparebfb.csh $test_data $comp_data - else - echo "MISS ${ICE_TESTNAME} bfbcomp missing-data" >> ${ICE_CASEDIR}/test_output + if (${bfbstatus} == 0) then + echo "PASS ${ICE_TESTNAME} bfbcomp ${ICE_BFBCOMP}" >> ${ICE_CASEDIR}/test_output + echo "bfb baseline and test dataset are identical" + else if (${bfbstatus} == 2) then + echo "MISS ${ICE_TESTNAME} bfbcomp ${ICE_BFBCOMP} missing-data" >> ${ICE_CASEDIR}/test_output echo "Missing data" + else + echo "FAIL ${ICE_TESTNAME} bfbcomp ${ICE_BFBCOMP} different-data" >> ${ICE_CASEDIR}/test_output + echo "bfbcomp and test dataset are different" endif endif diff --git a/configuration/scripts/tests/comparebfb.csh b/configuration/scripts/tests/comparebfb.csh index 44e701fd1..046f40651 100755 --- a/configuration/scripts/tests/comparebfb.csh +++ b/configuration/scripts/tests/comparebfb.csh @@ -1,206 +1,95 @@ #!/bin/csh -f -# Compare the restart files or log files between the 2 cases +# Compare the binary files #----------------------------------------------------------- -# usage: comparebfb.script base_dir test_dir +# usage: comparebfb.csh base_file test_file +# does binary diff of two files +# usage: comparebfb.csh base_dir test_dir +# looks for base_iced and iced binary files for comparison +# usage: comparebfb.csh base_dir +# looks for iced binary files in both directories for comparison # -# base_dir: directory of either restart files or log files for the base -# simulation. -# test_dir: directory of either restart files or log files for the test -# simulation. -# -# To run a `restart` test, only 1 directory is passed +# Return Codes (depends on quality of error checking) +# 0 = pass +# 1 = fail +# 2 = missing data +# 9 = error + +set restart = 0 +set filearg = 0 -# Check to see if this is a restart case (i.e., only 1 directory is passed) if ( $#argv == 1 ) then + # One Dir was passed, assume it's for restart test set restart = 1 - set compare = 0 set base_dir = $argv[1] + set test_dir = $argv[1] else if ( $#argv == 2 ) then if ( -f $argv[1] ) then - # Files were passed - set compare = 1 - set restart = 0 - set base_file = $argv[1] - set test_file = $argv[2] + # Two Files were passed + set filearg = 1 + set base_data = $argv[1] + set test_data = $argv[2] else - # Directories were passed - set compare = 0 - set restart = 0 + # Two Directories were passed, assume it's to compare restart files set base_dir = $argv[1] set test_dir = $argv[2] endif else echo "Error in ${0}" + echo "Usage: ${0} " + echo " does binary diff of two files" + echo "Usage: ${0} " + echo " looks for base_iced and iced binary files for comparison" echo "Usage: ${0} " - echo " only included for non-restart tests" + echo " looks for iced binary files in both directories for comparison" + exit 9 endif -if ( $compare == 0 ) then - # Check to see if the base directory includes runlogs, or restart files - set numfiles = `find $base_dir -maxdepth 1 -name 'cice.runlog*' | wc -l` - if ( $numfiles > 0 ) then - # Compare log files - set logcmp = 1 - else - # Compare restart files - set numfiles = `find $base_dir -maxdepth 1 -name '*.nc' | wc -l` - if ( $numfiles > 0 ) then - # Compare netcdf files - set binary = 0 +set failure = 0 + +if ($filearg == 1) then + echo "base_data: $base_data" + echo "test_data: $test_data" + if ( -e ${base_data} && -e ${test_data}) then + if ( { cmp -s ${base_data} ${test_data} } ) then + echo " compare OK" else - # Compare binary files - set binary = 1 + set failure = 1 + echo " compare FAIL" endif - set logcmp = 0 - endif -else - set logcmp = 0 - set binary = 0 -endif - -if ( $logcmp == 1 ) then - # Compare the diagnostic output in the log files - # --------------------- - echo "Performing comparison between log files:" - echo "" - echo "" - - if ( $restart == 1 ) then - # This is a restart test. Grab the base and test log files from the same directory - set base_log = `ls -t1 $base_dir/cice.runlog* | head -2 | tail -1` - set test_log = `ls -t1 $base_dir/cice.runlog* | head -1` else - # This is a test comparing 2 separate directories - set base_log = `ls -t1 $base_dir/cice.runlog* | head -1` - set test_log = `ls -t1 $test_dir/cice.runlog* | head -1` + set failure = 2 + echo " missing data" endif - echo "base: $base_log" - echo "test: $test_log" - - set base_out = `tac $base_log | awk 'BEGIN{found=1;} /istep1:/ {found=0} {if (found) print}' | tac | grep '= ' | grep -v 'min, max, sum' | tr '\n' ','` - set test_out = `tac $test_log | awk 'BEGIN{found=1;} /istep1:/ {found=0} {if (found) print}' | tac | grep '= ' | grep -v 'min, max, sum'` - # Ensure that there is diagnostic output - if ( ${#base_out} < 10 || ${#test_out} < 10 ) then - echo "No diagnostic output for comparison" - exit 1 - endif - - # Replace all asterisks (*) with a period (!) as workaround for errors - # encountered looping through words with asterisks in csh - set base_out = `echo "$base_out" | sed 's/\*/./g'` - set test_out = `echo "$test_out" | sed 's/\*/./g'` - - set failure = 0 - # Loop through each line of diagnostic output and check for differences - foreach line ( "`echo '$base_out' | tr ',' '\n'`" ) - foreach word ( $line ) - if ( "$word" != "$test_out[1]" ) then - # Print the difference to the log - echo "Difference in:" - echo "$line" - echo "Base value: $word" - echo "Test value: $test_out[1]" - set failure = 1 - endif - shift test_out - end - end - - if ( $failure == 0 ) then - exit 0 - #echo "PASS ${ICE_TESTNAME} log " >> ${ICE_CASEDIR}/test_output - else - exit 1 - #echo "FAIL ${ICE_TESTNAME} log " >> ${ICE_CASEDIR}/test_output - endif -else if ( $compare == 0 ) then - echo "Exact Restart Comparison Mode:" - if ( $binary == 1 ) then - if ( $restart == 1 ) then - # Restart case. Compare restart files (iced.*) to base files (base_iced.*) - set end_date = `ls -t1 $base_dir | head -1 | awk -F'.' '{print $NF}'` - set failure = 0 - foreach file (${base_dir}/base_*${end_date}) - echo "Performing binary comparison between files:" - set base_data = `echo $file | awk -F'/' '{print $NF}'` - set test_data = `echo $file | awk -F'/' '{print $NF}' | cut -c6-` - echo "base: $base_data" - echo "test: $test_data" - cmp -s $base_data $test_data - if ( $? == 1 ) then - set failure = 1 - echo "Failure in data comparison" - break - endif - end - else - # bfbcmp case. Compare restart files (iced.*) between the 2 directories - set end_date = `ls -t1 $base_dir | head -1 | awk -F'.' '{print $NF}'` - set failure = 0 - foreach file (${base_dir}/*${end_date}) - echo "Performing binary comparison between files:" - set base_data = $base_dir/`echo $file | awk -F'/' '{print $NF}'` - set test_data = $test_dir/`echo $file | awk -F'/' '{print $NF}'` - echo "base: $base_data" - echo "test: $test_data" - cmp -s $base_data $test_data - if ( $? == 1 ) then - set failure = 1 - echo "Failure in data comparison" - break - endif - end - endif - if ( $failure == 0 ) then - exit 0 - #echo "PASS ${ICE_TESTNAME} test " >> ${ICE_CASEDIR}/test_output - else - exit 1 - #echo "FAIL ${ICE_TESTNAME} test " >> ${ICE_CASEDIR}/test_output - endif - else - echo "Performing binary comparison between files:" - if ( $restart == 1 ) then - # This is a restart test. Grab the restart files from the same directory - set base_file = $base_dir/`ls -t1 $base_dir | head -2 | tail -1` - set test_file = $base_dir/`ls -t1 $base_dir | head -1` +else + set end_date = `ls -t1 $test_dir | head -1 | sed 's|^.*\([0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]-[0-9][0-9][0-9][0-9][0-9]\).*|\1|'` + # set nonomatch so that if foreach does not find anything it does not end the script + set nonomatch + foreach test_data (${test_dir}/iced*${end_date}*) + set test_file = "${test_data:t}" + if ($restart == 1) then + set base_data = ${base_dir}/base_${test_file} else - # This is a test comparing 2 separate directories - set base_file = $base_dir/`ls -t1 $base_dir | head -1` - set test_file = $test_dir/`ls -t1 $test_dir | head -1` + set base_data = ${base_dir}/${test_file} endif - echo "base: $base_file" - echo "test: $test_file" - if ( { cmp -s $test_file $base_file } ) then - exit 0 - #echo "PASS ${ICE_TESTNAME} test " >> ${ICE_CASEDIR}/test_output + echo "base_data: ${base_data}" + echo "test_data: ${test_data}" + if ( -e ${base_data} && -e ${test_data}) then + if ( { cmp -s ${base_data} ${test_data} } ) then + echo " compare OK" + else + set failure = 1 + echo " compare FAIL" + endif else - exit 1 - #echo "FAIL ${ICE_TESTNAME} test " >> ${ICE_CASEDIR}/test_output + if ($failure == 0) set failure = 2 + echo " missing data" endif - endif -else if ( $compare == 1 ) then - # Compare restart files for differing cases (bfbcomp) - echo "" - echo "BFB Compare Mode:" - if ( "$base_file" =~ *.nc && "$test_file" =~ *.nc ) then - echo "Comparing netcdf files" - else if ( "$base_file" !=~ *.nc && "$test_file" !=~ *.nc ) then - echo "Comparing binary files" - else - echo "${0}: A comparison cannot be performed between netcdf and binary files." - exit 1 - endif - echo "base: $base_file" - echo "test: $test_file" - if ( { cmp -s $test_file $base_file } ) then - exit 0 - else - exit 1 - endif -else - echo "${0}: script failure" -endif # if logcmp + end + unset nonomatch +endif + +exit ${failure} + diff --git a/configuration/scripts/tests/comparelog.csh b/configuration/scripts/tests/comparelog.csh new file mode 100755 index 000000000..8c1ff3a3c --- /dev/null +++ b/configuration/scripts/tests/comparelog.csh @@ -0,0 +1,70 @@ +#!/bin/csh -f + +# Compare prognostic output in two log files +#----------------------------------------------------------- + +# usage: comparelog.csh base_file test_file +# does diff of two files +# +# Return Codes (depends on quality of error checking) +# 0 = pass +# 1 = fail +# 2 = missing data +# 9 = error + +set filearg = 0 +if ( $#argv == 2 ) then + set filearg = 1 + set base_data = $argv[1] + set test_data = $argv[2] +else + echo "Error in ${0}" + echo "Usage: ${0} " + echo " does diff of two files" + exit 9 +endif + +set failure = 0 +set base_out = "comparelog_base_out_file.log" +set test_out = "comparelog_test_out_file.log" + +if ($filearg == 1) then + echo "base_data: $base_data" + echo "test_data: $test_data" + if ( -f ${base_data} && -f ${test_data}) then + if (${base_data} == ${test_data}) then + set failure = 9 + echo " input data are same" + else + + touch ${base_out} + cat ${base_data} | grep -A 99999999 istep1: | grep -e istep1: -e = | grep -iv "min, max, sum" >&! ${base_out} + touch ${test_out} + cat ${test_data} | grep -A 99999999 istep1: | grep -e istep1: -e = | grep -iv "min, max, sum" >&! ${test_out} + + set basenum = `cat ${base_out} | wc -l` + set testnum = `cat ${base_out} | wc -l` + set filediff = `diff -w ${base_out} ${test_out} | wc -l` + + if (${basenum} > 0 && ${testnum} > 0) then + if ($filediff == 0) then + echo " compare OK" + else + set failure = 1 + echo " compare FAIL" + endif + else + set failure = 9 + echo " compare on no output" + endif + rm ${base_out} + rm ${test_out} + endif + else + set failure = 2 + echo " missing data" + endif +endif + +exit ${failure} + diff --git a/configuration/scripts/tests/decomp_suite.ts b/configuration/scripts/tests/decomp_suite.ts index a17ac0c3f..1e1ae3112 100644 --- a/configuration/scripts/tests/decomp_suite.ts +++ b/configuration/scripts/tests/decomp_suite.ts @@ -1,6 +1,8 @@ # Test Grid PEs Sets BFB-compare restart gx3 4x2x25x29x4 dslenderX2 decomp gx3 4x2x25x29x5 +sleep 30 +restart gx3 1x1x50x58x4 droundrobin,thread restart_gx3_4x2x25x29x4_dslenderX2 restart gx3 4x1x25x116x1 dslenderX1,thread restart_gx3_4x2x25x29x4_dslenderX2 restart gx3 6x2x4x29x18 dspacecurve restart_gx3_4x2x25x29x4_dslenderX2 restart gx3 8x2x8x10x20 droundrobin restart_gx3_4x2x25x29x4_dslenderX2 @@ -10,5 +12,4 @@ restart gx3 1x20x5x29x80 dsectrobin,short restart_gx3_4x2x25x29x4_ restart gx3 16x2x5x10x20 drakeX2 restart_gx3_4x2x25x29x4_dslenderX2 restart gx3 8x2x8x10x20 droundrobin,maskhalo restart_gx3_4x2x25x29x4_dslenderX2 restart gx3 1x4x25x29x16 droundrobin restart_gx3_4x2x25x29x4_dslenderX2 -restart gx3 1x1x50x58x4 droundrobin,thread restart_gx3_4x2x25x29x4_dslenderX2 diff --git a/configuration/scripts/tests/first_suite.ts b/configuration/scripts/tests/first_suite.ts new file mode 100644 index 000000000..b06ee7e0b --- /dev/null +++ b/configuration/scripts/tests/first_suite.ts @@ -0,0 +1,5 @@ +# Test Grid PEs Sets BFB-compare +smoke gx3 8x2 diag1,run5day +restart gx3 4x2x25x29x4 dslenderX2 +logbfb gx3 4x2x25x29x4 dslenderX2,diag1,reprosum +smoke gx3 1x2 run2day diff --git a/configuration/scripts/tests/poll_queue.csh b/configuration/scripts/tests/poll_queue.csh index 6a9481645..0272a1aa9 100755 --- a/configuration/scripts/tests/poll_queue.csh +++ b/configuration/scripts/tests/poll_queue.csh @@ -1,38 +1,30 @@ #!/bin/csh -f -# Parse the job IDs from suite.log. This should work for PBS, Slurm, or IBM LFS but needs -# to be thoroughly tested (so far only tested on PBS) +if (-e poll_queue.env) then + source poll_queue.env +endif -if (-e suite.jobs) rm -f suite.jobs -set job_id = 0 -foreach line ( "`cat suite.log`" ) - if ( $job_id == 1 ) then - set job_id = 0 - if ( "$line" != " " ) then - # Grep the job number - echo "$line" | grep -oP "\d+" | sort -n | tail -1 >> suite.jobs - endif - else - if ( "$line" =~ *'COMPILE SUCCESSFUL'* ) then - set job_id = 1 - endif - if ( "$line" =~ *'ciceexe'* ) then - set job_id = 1 - endif - endif -end +# Parse the job IDs from suite.jobs. This should work for PBS, Slurm, or IBM LFS but needs +# to be thoroughly tested (so far only tested on PBS) # Wait for all jobs to finish -foreach job ("`cat suite.jobs`") - while (1) - ${ICE_MACHINE_QSTAT} $job >&/dev/null - if ($? != 0) then - echo "Job $job completed" - break - endif - echo "Waiting for $job to complete" - sleep 60 # Sleep for 1 minute, so as not to overwhelm the queue manager - end +foreach line ("`cat suite.jobs`") + set job = `echo "$line" | sed 's|^[^0-9]*\([0-9]*\).*$|\1|g'` + set qstatjob = 1 + if (${job} =~ [0-9]*) then + while ($qstatjob) + ${ICE_MACHINE_QSTAT} $job >&/dev/null + set qstatus = $status +# echo $job $qstatus + if ($qstatus != 0) then + echo "Job $job completed" + set qstatjob = 0 + else + echo "Waiting for $job to complete" + sleep 60 # Sleep for 1 minute, so as not to overwhelm the queue manager + endif +# echo $qstatjob + end + endif end -#rm suite.jobs # Delete the list of job IDs diff --git a/configuration/scripts/tests/report_results.csh b/configuration/scripts/tests/report_results.csh index e3d1cef44..044198ef6 100755 --- a/configuration/scripts/tests/report_results.csh +++ b/configuration/scripts/tests/report_results.csh @@ -118,12 +118,12 @@ if ( $fbuild != "" || $frun != "" || $ftest != "" ) then set frun = `grep " ${case} " results.log | grep " run" | cut -c 1-4` set ftest = `grep " ${case} " results.log | grep " test" | cut -c 1-4` set fregr = `grep " ${case} " results.log | grep " compare" | cut -c 1-4` - set fcomp = `grep " ${case} " results.log | grep " bfbcomp" | cut -c 1-4` + set fcomp = `grep " ${case} bfbcomp " results.log | cut -c 1-4` # if (${ftest} == "PASS") set frun = "PASS" # if (${frun} == "PASS") set fbuild = "PASS" set vregr = `grep " ${case} " results.log | grep " compare" | cut -d " " -f 4 | sed 's/\./ /g' ` - set vcomp = `grep " ${case} " results.log | grep " bfbcomp" | cut -d " " -f 4` + set vcomp = `grep " ${case} bfbcomp " results.log | cut -d " " -f 4` set vtime1 = `grep " ${case} " results.log | grep " run" | cut -d " " -f 4` set vtime2 = `grep " ${case} " results.log | grep " run" | cut -d " " -f 5` diff --git a/configuration/scripts/tests/reprosum_suite.ts b/configuration/scripts/tests/reprosum_suite.ts new file mode 100644 index 000000000..34cf51a80 --- /dev/null +++ b/configuration/scripts/tests/reprosum_suite.ts @@ -0,0 +1,11 @@ +# Test Grid PEs Sets BFB-compare +logbfb gx3 4x2x25x29x4 dslenderX2,diag1,reprosum +#logbfb gx3 4x2x25x29x4 dslenderX2,diag1 +sleep 60 +logbfb gx3 1x1x50x58x4 droundrobin,diag1,thread,maskhalo,reprosum logbfb_gx3_4x2x25x29x4_diag1_dslenderX2_reprosum +logbfb gx3 4x1x25x116x1 dslenderX1,diag1,thread,maskhalo,reprosum logbfb_gx3_4x2x25x29x4_diag1_dslenderX2_reprosum +logbfb gx3 1x20x5x29x80 dsectrobin,diag1,short,reprosum logbfb_gx3_4x2x25x29x4_diag1_dslenderX2_reprosum +logbfb gx3 8x2x8x10x20 droundrobin,diag1,reprosum logbfb_gx3_4x2x25x29x4_diag1_dslenderX2_reprosum +logbfb gx3 6x2x50x58x1 droundrobin,diag1,reprosum logbfb_gx3_4x2x25x29x4_diag1_dslenderX2_reprosum +logbfb gx3 6x2x4x29x18 dspacecurve,diag1,maskhalo,reprosum logbfb_gx3_4x2x25x29x4_diag1_dslenderX2_reprosum +#logbfb gx3 8x2x8x10x20 droundrobin,diag1 logbfb_gx3_4x2x25x29x4_diag1_dslenderX2 diff --git a/configuration/scripts/tests/rgvers_suite.ts b/configuration/scripts/tests/rgvers_suite.ts new file mode 100644 index 000000000..dc85613db --- /dev/null +++ b/configuration/scripts/tests/rgvers_suite.ts @@ -0,0 +1,18 @@ +# Test Grid PEs Sets BFB-compare +#restart gx1 4x4 droundrobin,short +#restart gx1 8x4 droundrobin,short +#restart gx1 12x4 droundrobin,short +#restart gx1 16x4 droundrobin,short +#restart gx1 20x4 droundrobin,short +#restart gx1 24x4 droundrobin,short +#restart gx1 28x4 droundrobin,short +#restart gx1 32x4 droundrobin,short +restart gx1 32x2 droundrobin,short +restart gx1 32x3 droundrobin,short +restart gx1 32x1 droundrobin,short + +restart gx1 33x4 droundrobin,short +restart gx1 34x4 droundrobin,short +restart gx1 35x4 droundrobin,short +restart gx1 36x4 droundrobin,short +#restart gx1 40x4 droundrobin,short diff --git a/configuration/scripts/tests/test_decomp.script b/configuration/scripts/tests/test_decomp.script index b58dbcb2b..70d7cd24a 100644 --- a/configuration/scripts/tests/test_decomp.script +++ b/configuration/scripts/tests/test_decomp.script @@ -17,14 +17,14 @@ foreach decomp (${decomps}) end cp ice_in ice_in.0 -set base_data = "" +set base_dir = "" foreach decomp (${decomps}) ${ICE_CASEDIR}/casescripts/parse_namelist.sh ice_in ${ICE_CASEDIR}/casescripts/set_nml.d${decomp} cp ice_in ice_in.${decomp} ./cice.run - set res="$?" + set res="$status" set grade = FAIL if ( $res == 0 ) then @@ -51,24 +51,34 @@ foreach decomp (${decomps}) if (${tdynamics} == "") set tdynamics = -1 if (${tcolumn} == "") set tcolumn = -1 - set test_file = `ls -t1 ${ICE_RUNDIR}/restart | head -1` - set test_data = ${ICE_RUNDIR}/restart/${test_file} - mv -f ${test_data} ${test_data}.${decomp} - set test_file = ${test_file}.${decomp} - set test_data = ${test_data}.${decomp} + set rest_dir = ${ICE_RUNDIR}/restart + mv -f ${rest_dir} ${rest_dir}.${decomp} + mkdir -p ${rest_dir} + set rest_dir = ${rest_dir}.${decomp} echo "$grade ${ICE_TESTNAME}_${decomp} run ${ttimeloop} ${tdynamics} ${tcolumn}" >> ${ICE_CASEDIR}/test_output echo "$grade ${ICE_TESTNAME}_${decomp} test" >> ${ICE_CASEDIR}/test_output + set test_dir = ${rest_dir} # bfb compare section - if (${base_data} == "") then + if (${base_dir} == "") then # First run is the base data set base_case = ${ICE_TESTNAME}_${decomp} - set base_data = ${test_data} + set base_dir = ${test_dir} else - set grade = FAIL - if ( { cmp -s $test_data $base_data } ) then + echo "" + echo "bfb Compare Mode:" + echo "base_dir: ${base_dir}" + echo "test_dir: ${test_dir}" + + ${ICE_CASEDIR}/casescripts/comparebfb.csh ${base_dir} ${test_dir} + set bfbstatus = $status + if ($bfbstatus == 0) then set grade = PASS + echo "bfb baseline and test dataset are identical" + else + set grade = FAIL + echo "bfbcomp and test dataset are different" endif echo "$grade ${ICE_TESTNAME}_${decomp} bfbcomp ${base_case}" >> ${ICE_CASEDIR}/test_output endif @@ -76,8 +86,7 @@ foreach decomp (${decomps}) # compare section if (${ICE_BASECOM} != ${ICE_SPVAL}) then - set baseline_dir = ${ICE_BASELINE}/${ICE_BASECOM}/${ICE_TESTNAME}/restart - set baseline_data = ${baseline_dir}/${test_file} + set baseline_dir = ${ICE_BASELINE}/${ICE_BASECOM}/${ICE_TESTNAME}/restart.${decomp} set baseline_log = `ls -1t ${ICE_BASELINE}/${ICE_BASECOM}/${ICE_TESTNAME}/cice.runlog*.${decomp} | head -1` set btimeloop = -1 @@ -94,19 +103,19 @@ foreach decomp (${decomps}) echo "" echo "Regression Compare Mode:" - echo "Performing binary comparison between files" - echo "baseline: ${baseline_data}" - echo "test: ${test_data}" - if (-e ${baseline_data} ) then - if ( { cmp -s ${test_data} ${baseline_data} } ) then - echo "PASS ${ICE_TESTNAME}_$decomp compare ${ICE_BASECOM} ${btimeloop} ${bdynamics} ${bcolumn}" >> ${ICE_CASEDIR}/test_output - else - echo "FAIL ${ICE_TESTNAME}_$decomp compare ${ICE_BASECOM} ${btimeloop} ${bdynamics} ${bcolumn} different-data" >> ${ICE_CASEDIR}/test_output - echo "Regression baseline and test dataset are different" - endif - else - echo "MISS ${ICE_TESTNAME}_$decomp compare ${ICE_BASECOM} ${btimeloop} ${bdynamics} ${bcolumn} missing-data" >> ${ICE_CASEDIR}/test_output + echo "base_dir: ${baseline_dir}" + echo "test_dir: ${test_dir}" + ${ICE_CASEDIR}/casescripts/comparebfb.csh ${baseline_dir} ${test_dir} + set bfbstatus = $status + if ( ${bfbstatus} == 0 ) then + echo "PASS ${ICE_TESTNAME}_${decomp} compare ${ICE_BASECOM} ${btimeloop} ${bdynamics} ${bcolumn}" >> ${ICE_CASEDIR}/test_output + echo "Regression baseline and test dataset are identical" + else if ( ${bfbstatus} == 2 ) then + echo "MISS ${ICE_TESTNAME}_${decomp} compare ${ICE_BASECOM} ${btimeloop} ${bdynamics} ${bcolumn} missing-data" >> ${ICE_CASEDIR}/test_output echo "Missing data" + else + echo "FAIL ${ICE_TESTNAME}_${decomp} compare ${ICE_BASECOM} ${btimeloop} ${bdynamics} ${bcolumn} different-data" >> ${ICE_CASEDIR}/test_output + echo "Regression baseline and test dataset are different" endif endif endif diff --git a/configuration/scripts/tests/test_logbfb.script b/configuration/scripts/tests/test_logbfb.script new file mode 100644 index 000000000..fbce5d918 --- /dev/null +++ b/configuration/scripts/tests/test_logbfb.script @@ -0,0 +1,36 @@ + +#---------------------------------------------------- +# Run the CICE model +# This is identical to a smoke test, but triggers bfbcompare with log files instead of restarts +# cice.run returns -1 if run did not complete successfully + +mv -f ${ICE_CASEDIR}/test_output ${ICE_CASEDIR}/test_output.prev +cat ${ICE_CASEDIR}/test_output.prev | grep -iv "${ICE_TESTNAME} run" >! ${ICE_CASEDIR}/test_output +rm -f ${ICE_CASEDIR}/test_output.prev +echo "RUN ${ICE_TESTNAME} run " >> ${ICE_CASEDIR}/test_output + +./cice.run +set res="$status" + +set log_file = `ls -t1 ${ICE_RUNDIR}/cice.runlog* | head -1` +set ttimeloop = `grep TimeLoop ${log_file} | grep Timer | cut -c 22-32` +set tdynamics = `grep Dynamics ${log_file} | grep Timer | cut -c 22-32` +set tcolumn = `grep Column ${log_file} | grep Timer | cut -c 22-32` +if (${ttimeloop} == "") set ttimeloop = -1 +if (${tdynamics} == "") set tdynamics = -1 +if (${tcolumn} == "") set tcolumn = -1 + +mv -f ${ICE_CASEDIR}/test_output ${ICE_CASEDIR}/test_output.prev +cat ${ICE_CASEDIR}/test_output.prev | grep -iv "${ICE_TESTNAME} run" >! ${ICE_CASEDIR}/test_output +mv -f ${ICE_CASEDIR}/test_output ${ICE_CASEDIR}/test_output.prev +cat ${ICE_CASEDIR}/test_output.prev | grep -iv "${ICE_TESTNAME} test" >! ${ICE_CASEDIR}/test_output +rm -f ${ICE_CASEDIR}/test_output.prev + +set grade = FAIL +if ( $res == 0 ) then + set grade = PASS +endif + +echo "$grade ${ICE_TESTNAME} run ${ttimeloop} ${tdynamics} ${tcolumn}" >> ${ICE_CASEDIR}/test_output +echo "$grade ${ICE_TESTNAME} test " >> ${ICE_CASEDIR}/test_output + diff --git a/configuration/scripts/tests/test_restart.script b/configuration/scripts/tests/test_restart.script index 98bd10447..20953b1e1 100644 --- a/configuration/scripts/tests/test_restart.script +++ b/configuration/scripts/tests/test_restart.script @@ -16,7 +16,7 @@ echo "RUN ${ICE_TESTNAME} run " >> ${ICE_CASEDIR}/test_output echo "PEND ${ICE_TESTNAME} test " >> ${ICE_CASEDIR}/test_output ./cice.run -set res="$?" +set res="$status" if ( $res != 0 ) then mv -f ${ICE_CASEDIR}/test_output ${ICE_CASEDIR}/test_output.prev @@ -55,7 +55,7 @@ ${ICE_CASEDIR}/casescripts/parse_namelist.sh ice_in ${ICE_CASEDIR}/casescripts/t cp ice_in ice_in.2 ./cice.run -set res="$?" +set res="$status" cp ice_in.0 ice_in @@ -80,17 +80,12 @@ else echo "PASS ${ICE_TESTNAME} run ${ttimeloop} ${tdynamics} ${tcolumn}" >> ${ICE_CASEDIR}/test_output ${ICE_CASEDIR}/casescripts/comparebfb.csh ${ICE_RUNDIR}/restart - if ( $? == 0 ) then + set bfbstatus = $status + if (${bfbstatus} == 0) then echo "PASS ${ICE_TESTNAME} test " >> ${ICE_CASEDIR}/test_output else echo "FAIL ${ICE_TESTNAME} test " >> ${ICE_CASEDIR}/test_output endif - #${ICE_CASEDIR}/casescripts/comparebfb.csh ${ICE_RUNDIR}/ - #if ( $? == 0 ) then - # echo "PASS ${ICE_TESTNAME} log " >> ${ICE_CASEDIR}/test_output - #else - # echo "FAIL ${ICE_TESTNAME} log " >> ${ICE_CASEDIR}/test_output - #endif endif #----------------------------------------------------------- diff --git a/configuration/scripts/tests/test_smoke.script b/configuration/scripts/tests/test_smoke.script index 53d1747b0..42a963b47 100644 --- a/configuration/scripts/tests/test_smoke.script +++ b/configuration/scripts/tests/test_smoke.script @@ -9,7 +9,7 @@ rm -f ${ICE_CASEDIR}/test_output.prev echo "RUN ${ICE_TESTNAME} run " >> ${ICE_CASEDIR}/test_output ./cice.run -set res="$?" +set res="$status" set log_file = `ls -t1 ${ICE_RUNDIR}/cice.runlog* | head -1` set ttimeloop = `grep TimeLoop ${log_file} | grep Timer | cut -c 22-32` diff --git a/configuration/scripts/tests/timeseries.csh b/configuration/scripts/timeseries.csh similarity index 98% rename from configuration/scripts/tests/timeseries.csh rename to configuration/scripts/timeseries.csh index 0dd4f7bc2..cdd025efc 100755 --- a/configuration/scripts/tests/timeseries.csh +++ b/configuration/scripts/timeseries.csh @@ -76,7 +76,7 @@ foreach field ($fieldlist:q) set format = "%Y%m%d-%H" set output = `echo $fieldname | sed 's/ /_/g'` - set output = "${output}_${basename}.png" + set output = "${output}_${ICE_CASENAME}.png" echo "Plotting data for '$fieldname' and saving to $output" diff --git a/configuration/scripts/timeseries.py b/configuration/scripts/timeseries.py new file mode 100755 index 000000000..2b50c373a --- /dev/null +++ b/configuration/scripts/timeseries.py @@ -0,0 +1,297 @@ +#!/usr/bin/env python + +''' +This script generates timeseries plots of CICE diagnostic output. +It is generated to replicate the previous timeseries.csh script. + +Written by: Matthew Turner +Date: August, 2019 +''' + +import os +import sys +import logging +import numpy as np + +def find_logfile(log_dir): + ''' + This function searches for the most recently created log file in the provided directory. + ''' + + logger.debug('Getting a list of files in {}'.format(log_dir)) + try: + path = '{}/logs'.format(log_dir.rstrip('/')) + files = [os.path.join(path,f) for f in os.listdir('{}/logs'.format(log_dir)) \ + if f.startswith('cice.runlog')] + except: + path = log_dir + files = [os.path.join(path,f) for f in os.listdir(log_dir) if f.startswith('cice.runlog')] + + # Check if any files were found. If not, exit + if len(files) == 0: + logger.error('No cice.runlog* files found. Please make sure you are passing the \ + correct directory.') + sys.exit(1) + + # Get the most recently created file + outfile = max(files, key = os.path.getctime) + + logger.debug('List of files = {}'.format([f for f in files])) + logger.debug('Most recent file is {}'.format(outfile)) + + return outfile + +def get_data(logfile,field): + ''' + This function extracts data from a CICE log file for the specific field. + ''' + import datetime + import re + + logger.debug('Extracting data for {}'.format(field)) + + # Build the regular expression to extract the data + field_regex = field.replace('(','\(').replace('^','\^').replace(')','\)') + number_regex = '[-+]?\d+\.?\d+([eE][-+]?\d+)?' + my_regex = '{}\s+=\s+({})\s+({})'.format(field_regex,number_regex,number_regex) + + dtg = [] + arctic = [] + antarctic = [] + with open(logfile) as f: + for line in f.readlines(): + m1 = re.search('istep1:\s+(\d+)\s+idate:\s+(\d+)\s+sec:\s+(\d+)', line) + if m1: + # Extract the current date-time group from the file + date = m1.group(2) + seconds = int(m1.group(3)) + hours = seconds // 3600 + minutes = (seconds - hours*3600) // 60 + leftover = seconds - hours*3600 - minutes*60 + curr_date = '{}-{:02d}:{:02d}:{:02d}'.format(date,hours,minutes,leftover) + dtg.append(datetime.datetime.strptime(curr_date, '%Y%m%d-%H:%M:%S')) + logger.debug('Currently on timestep {}'.format(dtg[-1])) + + m = re.search(my_regex, line) + if m: + # Extract the data from the file + if 'E' in m.group(1) or 'e' in m.group(1): + expon = True + else: + expon = False + arctic.append(float(m.group(1))) + antarctic.append(float(m.group(3))) + logger.debug(' Arctic = {}, Antarctic = {}'.format(arctic[-1], antarctic[-1])) + + return dtg, arctic, antarctic, expon + +def latexit(string): + s = string[::-1].replace('(','($',1) + return (s.replace(')','$)',1))[::-1] + +def plot_timeseries(log, field, dtg, arctic, antarctic, expon, dtg_base=None, arctic_base=None, \ + antarctic_base=None, base_dir=None, grid=False): + ''' + Plot the timeseries data from the CICE log file + ''' + + casename = os.path.abspath(log).rstrip('/').rstrip('/logs').split('/')[-1] + if base_dir: + base_casename = os.path.abspath(base_dir).rstrip('/').rstrip('/logs').split('/')[-1] + + # Load the plotting libraries, but set the logging level for matplotlib + # to WARNING so that matplotlib debugging info is not printed when running + # with '-v' + logging.getLogger('matplotlib').setLevel(logging.WARNING) + import matplotlib.pyplot as plt + import matplotlib.dates as mdates + import matplotlib.ticker as ticker + + fig = plt.figure(figsize=(12,8)) + ax = fig.add_axes([0.05,0.08,0.9,0.9]) + + # Add the arctic data to the plot + ax.plot(dtg,arctic,label='Arctic') + # Add the baseline arctic data to the plot, if available + if arctic_base: + ax.plot(dtg_base,arctic_base,label='Baseline Arctic') + + # Add the antarctic data to the plot + ax.plot(dtg,antarctic,label='Antarctic') + # Add the baseline antarctic data to the plot, if available + if antarctic_base: + ax.plot(dtg_base,antarctic_base,label='Baseline Antarctic') + + ax.set_xlabel('') + ax.set_title('{} Diagnostic Output'.format(latexit(field))) + ax.set_ylabel(latexit(field)) + + # Format the x-axis labels + ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y/%m/%d')) + ax.xaxis.set_minor_locator(mdates.MonthLocator()) + + # Add a text box that prints the test case name and the baseline case name (if given) + try: + text_field = "Test/Case: {}\nBaseline: {}".format(casename,base_casename) + from matplotlib.offsetbox import AnchoredText + anchored_text = AnchoredText(text_field,loc=2) + ax.add_artist(anchored_text) + except: + text_field = "Test/Case: {}".format(casename) + from matplotlib.offsetbox import AnchoredText + anchored_text = AnchoredText(text_field,loc=2) + ax.add_artist(anchored_text) + + ax.legend(loc='upper right') + + # Add grid lines if the `--grid` argument was passed at the command line. + if grid: + ax.grid(ls='--') + + # Reduce the number of ticks on the y axis + nbins = 10 + try: + minval = min( \ + min(min(arctic), min(antarctic)), \ + min(min(arctic_base), min(antarctic_base))) + maxval = max( \ + max(max(arctic), max(antarctic)), \ + max(max(arctic_base), max(antarctic_base))) + except: + minval = min(min(arctic), min(antarctic)) + maxval = max(max(arctic), max(antarctic)) + step = (maxval-minval)/nbins + ax.yaxis.set_ticks(np.arange(minval, maxval+step, step)) + + # Format the y-axis tick labels, based on whether or not the values in the log file + # are in scientific notation or float notation. + if expon: + ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%0.3e')) + else: + ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%0.5f')) + + # Rotate and right align the x labels + for tick in ax.get_xticklabels(): + tick.set_rotation(45) + + # Create an output file and save the figure + field_tmp = field.split('(')[0].rstrip() + try: + outfile = '{}_{}_base-{}.png'.format(field_tmp.replace(' ','_'), casename,base_casename) + except: + outfile = '{}_{}.png'.format(field_tmp.replace(' ','_'), casename) + logger.info('Saving file to {}'.format(outfile)) + plt.savefig(outfile,dpi=300,bbox_inches='tight') + +def main(): + import argparse + parser = argparse.ArgumentParser(description="To generate timeseries plots, this script \ + can be passed a directory containing a logs/ subdirectory, \ + or it can be run in the directory with the log files, \ + without being passed a directory. It will pull the \ + diagnostic data from the most recently modified log file.\ + \ + If no flags are passed selecting the variables to plot, \ + then plots will be created for all available variables.") + parser.add_argument('log_dir', nargs='?', default=os.getcwd(), \ + help="Path to diagnostic output log file. A specific log file can \ + be passed, or a case directory. If a directory is passed, \ + the most recent log file will be used. If no directory or \ + file is passed, the script will look for a log file in the \ + current directory.") + parser.add_argument('--bdir',dest='base_dir', help='Path to the the log file for a baseline \ + dataset, if desired. A specific log file or case directory can \ + be passed. If a directory is passed, the most recent log file \ + will be used.') + parser.add_argument('-v', '--verbose', dest='verbose', help='Print debug output?', \ + action='store_true') + parser.add_argument('--area', dest='area', help='Create a plot for total ice area?', \ + action='store_true') + parser.add_argument('--extent', dest='extent', help='Create a plot for total ice extent?', \ + action='store_true') + parser.add_argument('--volume', dest='ice_volume', help='Create a plot for total ice volume?', \ + action='store_true') + parser.add_argument('--snw_vol', dest='snow_volume', help='Create a plot for total snow \ + volume?', action='store_true') + parser.add_argument('--speed', dest='speed', help='Create a plot for rms ice speed?', \ + action='store_true') + parser.add_argument('--grid',dest='grid', help='Add grid lines to the figures?', \ + action='store_true') + + # Set the defaults for the command line options + parser.set_defaults(verbose=False) + parser.set_defaults(area=False) + parser.set_defaults(extent=False) + parser.set_defaults(ice_volume=False) + parser.set_defaults(snow_volume=False) + parser.set_defaults(speed=False) + parser.set_defaults(grid=False) + + args = parser.parse_args() + + # If no fields are passed, plot all fields + if not ( args.area or args.extent or args.ice_volume or args.snow_volume or args.speed ): + args.area = True + args.extent = True + args.ice_volume = True + args.snow_volume = True + args.speed = True + + # Build the fieldlist based on which fields are passed + fieldlist = [] + if args.area: + fieldlist.append('total ice area (km^2)') + if args.extent: + fieldlist.append('total ice extent(km^2)') + if args.ice_volume: + fieldlist.append('total ice volume (m^3)') + if args.snow_volume: + fieldlist.append('total snw volume (m^3)') + if args.speed: + fieldlist.append('rms ice speed (m/s)') + + # Setup the logger + global logger + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) + logger = logging.getLogger(__name__) + + # Find the test and baseline log files, based on the input directories. + if os.path.isdir(args.log_dir): + logger.debug('{} is a directory'.format(args.log_dir)) + log = find_logfile(args.log_dir) + log_dir = args.log_dir + else: + logger.debug('{} is a file'.format(args.log_dir)) + log = args.log_dir + log_dir = args.log_dir.rsplit('/',1)[0] + logger.info('Log file = {}'.format(log)) + if args.base_dir: + if os.path.isdir(args.base_dir): + base_log = find_logfile(args.base_dir) + base_dir = args.base_dir + else: + base_log = args.base_dir + base_dir = args.base_dir.rsplit('/',1)[0] + logger.info('Base Log file = {}'.format(base_log)) + + # Loop through each field and create the plot + for field in fieldlist: + logger.debug('Current field = {}'.format(field)) + + # Get the data from the log files + dtg, arctic, antarctic, expon = get_data(log, field) + if args.base_dir: + dtg_base, arctic_base, antarctic_base, expon_base = get_data(base_log,field) + + # Plot the data + if args.base_dir: + plot_timeseries(log_dir, field, dtg, arctic, antarctic, expon, dtg_base, \ + arctic_base, antarctic_base, base_dir, grid=args.grid) + else: + plot_timeseries(log_dir, field, dtg, arctic, antarctic, expon, grid=args.grid) + +if __name__ == "__main__": + main() diff --git a/doc/source/cice_index.rst b/doc/source/cice_index.rst index ef2c7d95d..e48e523ca 100644 --- a/doc/source/cice_index.rst +++ b/doc/source/cice_index.rst @@ -162,7 +162,7 @@ either Celsius or Kelvin units). "dte2T", "dte / 2(damping time scale)", "" "dtei", "1/dte, where dte is the EVP subcycling time step", "1/s" "dump_file", ":math:`\bullet` output file for restart dump", "" - "dumpfreq", ":math:`\bullet` dump frequency for restarts, y, m or d", "" + "dumpfreq", ":math:`\bullet` dump frequency for restarts, y, m, d, h or 1", "" "dumpfreq_n", ":math:`\bullet` restart output frequency", "" "dump_last", ":math:`\bullet` if true, write restart on last time step of simulation", "" "dxhy", "combination of HTE values", "" diff --git a/doc/source/conf.py b/doc/source/conf.py index 2599d0151..f9b0b7b68 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -54,7 +54,7 @@ # General information about the project. project = u'CICE' -copyright = u'2018, Triad National Security, LLC (code) and National Center for Atmospheric Research (documentation)' +copyright = u'2019, Triad National Security, LLC (code) and National Center for Atmospheric Research (documentation)' author = u'CICE-Consortium' # The version info for the project you're documenting, acts as replacement for @@ -62,9 +62,9 @@ # built documents. # # The short X.Y version. -version = u'6.0.0' +version = u'6.0.1' # The full version, including alpha/beta/rc tags. -version = u'6.0.0' +version = u'6.0.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/doc/source/developer_guide/dg_about.rst b/doc/source/developer_guide/dg_about.rst index 0fae7db05..bd06d7a3f 100755 --- a/doc/source/developer_guide/dg_about.rst +++ b/doc/source/developer_guide/dg_about.rst @@ -16,3 +16,14 @@ Guiding principles for the creation of CICE include the following: on individual gridcells, and contain no underlying infrastructure. CICE must call into Icepack using interfaces and approaches specified by Icepack. + +Git workflow and Pull Requests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There is extensive Information for Developers documentation available. +See https://github.com/CICE-Consortium/About-Us/wiki/Resource-Index#information-for-developers +for information on: + - Contributing to model development + - Software development practices guide + - git Workflow Guide - including extensive information about the Pull Request process and requirements + - Documentation Workflow Guide diff --git a/doc/source/developer_guide/dg_dynamics.rst b/doc/source/developer_guide/dg_dynamics.rst index 83933e591..21790f199 100755 --- a/doc/source/developer_guide/dg_dynamics.rst +++ b/doc/source/developer_guide/dg_dynamics.rst @@ -37,8 +37,18 @@ The dynamics solvers are found in **cicecore/cicedynB/dynamics/**. A couple of available including EVP, revised EVP, and EAP. The dynamics solver is specified in namelist with the ``kdyn`` variable. ``kdyn=1`` is evp, ``kdyn=2`` is eap, and revised evp requires the ``revised_evp`` namelist flag be set to true. -A vectorized version of EVP is available through the namelist flag ``evp_kernel_ver``. Default is "normal" -EVP as usual ``evp_kernel_ver=0``, whereas an vectorized version (ver.2) is available ``evp_kernel_ver=2``. + +Multiple evp solvers are supported thru the namelist flag ``kevp_kernel``. The standard implementation +and current default is ``kevp_kernel=0``. In this case, the stress is solved on the regular decomposition +via subcycling and calls to subroutine stress and subroutine stepu with MPI global sums required in each +subcycling call. With ``kevp_kernel=2``, the data required to compute the stress is gathered to the root +MPI process and the stress calculation is performed on the root task without any MPI global sums. OpenMP +parallelism is supported in ``kevp_kernel=2``. The solutions with ``kevp_kernel`` set to 0 or 2 will +not be bit-for-bit +identical but should be the same to roundoff and produce the same climate. ``kevp_kernel=2`` may perform +better for some configurations, some machines, and some pe counts. ``kevp_kernel=2`` is not supported +with the tripole grid and is still being validated. Until ``kevp_kernel=2`` is fully validated, it will +abort if set. To override the abort, use value 102 for testing. Transport diff --git a/doc/source/developer_guide/dg_other.rst b/doc/source/developer_guide/dg_other.rst index 0016dc17e..2a46186bd 100644 --- a/doc/source/developer_guide/dg_other.rst +++ b/doc/source/developer_guide/dg_other.rst @@ -9,21 +9,55 @@ Other things Reproducible Sums ---------------------- -The ‘reproducible’ option (`DITTO`) makes diagnostics bit-for-bit when -varying the number of processors. (The simulation results are -bit-for-bit regardless, because they do not require global sums or -max/mins as do the diagnostics.) This was done mainly by increasing the -precision for the global reduction calculations, except for regular -double-precision (r8) calculations involving MPI; MPI can not handle -MPI\_REAL16 on some architectures. Instead, these cases perform sums or -max/min calculations across the global block structure, so that the -results are bit-for-bit as long as the block distribution is the same -(the number of processors can be different). - -A more flexible option is available for double-precision MPI -calculations, using the namelist variable `bfbflag`. When true, this flag -produces bit-for-bit identical diagnostics with different tasks, -threads, blocks and grid decompositions. +Reproducible sums in the CICE diagnostics are set with the namelist `bfbflag`. +CICE prognostics results do NOT depend on the global sum implementation. The +results are bit-for-bit identical with any `bfbflag`. The `bfbflag` only impacts +the results and performance of the global diagnostics written to the CICE +log file. For best performance, the off (or lsum8 which is equivalent) setting is recommended. +This will probably not produce bit-for-bit results with different decompositions. +For bit-for-bit results, the reprosum setting is recommended. This should be +only slightly slower than the lsum8 implementation. + +Global sums of real types are not reproducible due to different order of operations of the +sums of the individual data which introduced roundoff errors. +This is caused when the model data is laid out in different +block decompositions or on different pe counts so the data is stored in memory +in different orders. Integer data should be bit-for-bit identical regardless of +the order of operation of the sums. + +The `bfbflag` namelist is a character string with several valid settings. +The tradeoff in these settings is the likelihood for bit-for-bit results versus +their cost. The `bfbflag` settings are implemented as follows, + +off is the default and equivalent to lsum8. + +lsum4 is a local sum computed with single precision (4 byte) data and a scalar mpi allreduce. +This is extremely unlikely to be bit-for-bit for different decompositions. +This should generally not be used as the accuracy is very poor for a model +implemented with double precision (8 byte) variables. + +lsum8 is a local sum computed with double precision data and a scalar mpi allreduce. +This is extremely unlikely to be bit-for-bit for different decompositions +but is fast. For CICE implemented in double precision, the differences in global sums +for different decompositions should be at the roundoff level. + +lsum16 is a local sum computed with quadruple precision (16 byte) data and a scalar mpi allreduce. +This is very likely to be bit-for-bit for different decompositions. However, +it should be noted that this implementation is not available or does not work +properly with some compiler and some MPI implementation. Support for quad precision +and consistency between underlying fortran and c datatypes can result in inability to +compile or incorrect results. The source code associated with this implementation +can be turned off with the cpp, NO_R16. Otherwise, it is recommended that this +option NOT be used or that results be carefully validated on any platform before +it is used. + +reprosum is a fixed point method based on ordered double integer sums +that requires two scalar reductions per global sum. This is extremely likely to be bfb, +but will be slightly more expensive than the lsum algorithms. See :cite:`Mirin12` + +ddpdd is a parallel double-double algorithm using single scalar reduction. +This is very likely to be bfb, but is not as fast or accurate as the reprosum +implementation. See :cite:`He01` .. _addtimer: @@ -126,57 +160,57 @@ Additional information about tracers can be found in the To add a tracer, follow these steps using one of the existing tracers as a pattern. -#. **icepack\_tracers.F90** and **icepack\_[tracer].F90**: declare tracers, -add flags and indices, and create physics routines as described in the -`Icepack documentation `_ + 1) **icepack\_tracers.F90** and **icepack\_[tracer].F90**: declare tracers, + add flags and indices, and create physics routines as described in the + `Icepack documentation `_ -#. **ice_arrays_column.F90**: declare arrays + 2) **ice_arrays_column.F90**: declare arrays -#. **ice_init_column.F90**: initialize arrays + 3) **ice_init_column.F90**: initialize arrays -#. **ice\_init.F90**: (some of this may be done in **icepack\_[tracer].F90** - instead) + 4) **ice\_init.F90**: (some of this may be done in **icepack\_[tracer].F90** + instead) - - declare ``tr_[tracer]`` and ``nt_[tracer]`` as needed + - declare ``tr_[tracer]`` and ``nt_[tracer]`` as needed - - add logical namelist variables ``tr_[tracer]``, ``restart_[tracer]`` + - add logical namelist variables ``tr_[tracer]``, ``restart_[tracer]`` - - initialize and broadcast namelist variables + - initialize and broadcast namelist variables - - check for potential conflicts, aborting if any occur + - check for potential conflicts, aborting if any occur - - print namelist variables to diagnostic output file + - print namelist variables to diagnostic output file - - initialize tracer flags etc in icepack (call *icepack_init_tracer_flags* etc) + - initialize tracer flags etc in icepack (call *icepack_init_tracer_flags* etc) - - increment number of tracers in use based on namelist input (``ntrcr``) + - increment number of tracers in use based on namelist input (``ntrcr``) - - define tracer dependencies + - define tracer dependencies -#. **CICE\_InitMod.F90**: initialize tracer (includes reading restart file) + 5) **CICE\_InitMod.F90**: initialize tracer (includes reading restart file) -#. **CICE\_RunMod.F90**, **ice\_step\_mod.F90** (and elsewhere as needed): + 6) **CICE\_RunMod.F90**, **ice\_step\_mod.F90** (and elsewhere as needed): - - call routine to write tracer restart data + - call routine to write tracer restart data - - call Icepack or other routines to update tracer value - (often called from **ice\_step\_mod.F90**) + - call Icepack or other routines to update tracer value + (often called from **ice\_step\_mod.F90**) -#. **ice\_restart.F90**: define restart variables (for binary, netCDF and PIO) + 7) **ice\_restart.F90**: define restart variables (for binary, netCDF and PIO) -#. **ice\_restart\_column.F90**: create routines to read, write tracer restart data + 8) **ice\_restart\_column.F90**: create routines to read, write tracer restart data -#. **ice\_fileunits.F90**: add new dump and restart file units + 9) **ice\_fileunits.F90**: add new dump and restart file units -#. **ice\_history\_[tracer].F90**: add history variables - (Section :ref:`addhist`) + 10) **ice\_history\_[tracer].F90**: add history variables + (Section :ref:`addhist`) -#. **ice\_in**: add namelist variables to *tracer\_nml* and - *icefields\_nml*. Best practice is to set the namelist values so that the - new capability is turned off, and create an option file with your preferred - configuration in **configuration/scripts/options**. + 11) **ice\_in**: add namelist variables to *tracer\_nml* and + *icefields\_nml*. Best practice is to set the namelist values so that the + new capability is turned off, and create an option file with your preferred + configuration in **configuration/scripts/options**. -#. If strict conservation is necessary, add diagnostics as noted for - topo ponds in the `Icepack documentation `_. + 12) If strict conservation is necessary, add diagnostics as noted for + topo ponds in the `Icepack documentation `_. -#. Update documentation, including **cice_index.rst** and **ug_case_settings.rst** + 13) Update documentation, including **cice_index.rst** and **ug_case_settings.rst** diff --git a/doc/source/developer_guide/dg_scripts.rst b/doc/source/developer_guide/dg_scripts.rst index 185616eab..e84859c4e 100755 --- a/doc/source/developer_guide/dg_scripts.rst +++ b/doc/source/developer_guide/dg_scripts.rst @@ -31,6 +31,8 @@ The directory structure under configure/scripts is as follows. | **parse_settings.sh** replaces settings with command-line configuration | **setup_run_dirs.csh** creates the case run directories | **set_version_number.csh** updates the model version number from the **cice.setup** command line +| **timeseries.csh** generates PNG timeseries plots from output files, using GNUPLOT +| **timeseries.py** generates PNG timeseries plots from output files, using Python | **tests/** scripts for configuring and running basic tests .. _dev_strategy: @@ -78,6 +80,18 @@ with appropriate names and syntax. The set_nml file syntax is the same as namel syntax and the set_env files are consistent with csh setenv syntax. See other files for examples of the syntax. +.. _build: + +Build Scripts +----------- + +CICE uses GNU Make to build the model. There is a common **Makefile** for all machines. +Each machine provides a Macros file to define some Makefile variables +and and an env file to specify the modules/software stack for each compiler. +The machine is built by the cice.build script which invokes Make. +There is a special trap for circular dependencies in the cice.build script to +highlight this error when it occurs. + .. _dev_machines: Machines @@ -122,9 +136,8 @@ setup the various tests, such as smoke and restart tests (**test_smoke.script**, and the files that describe with options files are needed for each test (ie. **test_smoke.files**, **test_restart.files**). A baseline test script (**baseline.script**) is also there to setup the general regression and comparison testing. That directory also contains the preset test suites -(ie. **base_suite.ts**) and a file that supports post-processing on the model -output (**timeseries.csh**). There is also a script **report_results.csh** that pushes results -from test suites back to the CICE-Consortium test results wiki page. +(ie. **base_suite.ts**) and a script (**report_results.csh**) that pushes results from +test suites back to the CICE-Consortium test results wiki page. To add a new test (for example newtest), several files may be needed, diff --git a/doc/source/intro/copyright.rst b/doc/source/intro/copyright.rst index 1fa535a70..57b190f44 100755 --- a/doc/source/intro/copyright.rst +++ b/doc/source/intro/copyright.rst @@ -5,7 +5,7 @@ Copyright ============================= -© Copyright 2018, Triad National Security LLC. All rights reserved. +© Copyright 2019, Triad National Security LLC. All rights reserved. This software was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department diff --git a/doc/source/intro/quickstart.rst b/doc/source/intro/quickstart.rst index deded30a5..547d6ef20 100755 --- a/doc/source/intro/quickstart.rst +++ b/doc/source/intro/quickstart.rst @@ -14,6 +14,8 @@ found in the `CICE Git and Workflow Guide `_ or :ref:`force`. +Software requirements are noted in this :ref:`software` section. + From your main CICE directory, execute:: ./cice.setup -c ~/mycase1 -g gx3 -m testmachine -s diag1,thread -p 8x1 diff --git a/doc/source/master_list.bib b/doc/source/master_list.bib index f4235b9fd..b80b36d41 100644 --- a/doc/source/master_list.bib +++ b/doc/source/master_list.bib @@ -34,6 +34,7 @@ @string{JGRO @string{JGRB = {J. Geophys. Res. Biogeo.}} @string{JGRA = {J. Geophys. Res. Atmos.}} @string{JCT = {J. Comput. Phys.}} +@string{JOS = {The Journal of Supercomputing}} @string{QJRMS = {Quart. J. Royal Met. Soc.}} @string{GRL = {Geophys. Res. Lett.}} @string{JAS = {J. Atmos. Sci.}} @@ -448,6 +449,16 @@ @Article{Lipscomb01 pages = {13989-14000}, url = {http://dx.doi.org/10.1029/2000JC000518} } +@Article{He01 + author = "Y. He and C.H.Q. Ding", + title = "{Using Accurate Arithmetics to Improve Numerical Reproducibility and Stability in Parallel Applications}", + journal = JOS, + year = {2001}, + volume = {18}, + issue = {3}, + pages = {259-277}, + url = {http://dx.doi.org/10.1023/A:1008153532043} +} @Article{Schulson01 author = "E.M. Schulson", title = "{Brittle failure of ice}", @@ -792,6 +803,16 @@ @Article{Lupkes12 number = {D13}, url = {http://dx.doi.org/10.1029/2012JD017630} } +@Article{Mirin12 + author = "A.A. Mirin and P.H. Worley", + title = "{Improving the Performance Scalability of the Community Atmosphere Model}", + journal = IJHPCA, + year = {2012}, + volume = {26}, + number = {1}, + pages = {17-30}, + url = {http://dx.doi.org/10.1177/1094342011412630} +} @Article{Bouillon13 author = "S. Bouillon and T. Fichefet and V. Legat and G. Madec", title = "{The elastic-viscous-plastic method revisited}", @@ -831,7 +852,7 @@ @Article{Turner13 @Article{Craig14, author = "A. Craig and S. Mickelson and E.C. Hunke and D. Bailey", title = "{Improved parallel performance of the CICE model in CESM1}", - journal = {IIJHPCA}, + journal = IJHPCA, year = {2014}, volume = {29}, number = {2}, diff --git a/doc/source/science_guide/sg_dynamics.rst b/doc/source/science_guide/sg_dynamics.rst index f5db205c6..8117fd58d 100644 --- a/doc/source/science_guide/sg_dynamics.rst +++ b/doc/source/science_guide/sg_dynamics.rst @@ -190,7 +190,11 @@ above hydrostatic balance and the value of :math:`k_2`. It is, however, the para The value of :math:`k_1` can be changed at runtime using the namelist variable ``k1``. The grounding scheme can be turned on or off using the namelist logical basalstress. Note that the user must provide a bathymetry field for using this grounding -scheme. Grounding occurs up to water depth of ~25 m. It is suggested to have a bathymetry field with water depths larger than 5 m that represents well shallow water regions such as the Laptev Sea and the East Siberian Sea. +scheme. It is suggested to have a bathymetry field with water depths larger than +5 m that represents well shallow water regions such as the Laptev Sea and the +East Siberian Sea. To prevent unrealistic grounding, :math:`T_b` is set to zero when :math:`h_{wu}` +is larger than 30 m. This maximum value is chosen based on observations of large +keels in the Arctic Ocean :cite:`Amundrud04`. .. _internal-stress: @@ -553,7 +557,7 @@ Introducing another numerical parameter :math:`\alpha=2T \Delta t_e ^{-1}` :cite where as opposed to the classic EVP, the second term in each equation is at iteration :math:`k` :cite:`Bouillon13`. Also, as opposed to the classic EVP, :math:`\Delta t_e` times the number of subcycles (or iterations) does not need to be equal to the advective time step :math:`\Delta t`. -A last difference between the classic EVP and the revised approach is that the latter one initializes the stresses to 0 at the beginning of each time step, -while the classic EVP approach uses the previous time level value. The revised EVP is activated by setting the namelist parameter `revised\_evp` = true. +Finally, as with the classic EVP approach, the stresses are initialized using the previous time level values. +The revised EVP is activated by setting the namelist parameter `revised\_evp` = true. In the code :math:`\alpha = arlx` and :math:`\beta = brlx`. The values of :math:`arlx` and :math:`brlx` can be set in the namelist. It is recommended to use large values of these parameters and to set :math:`arlx=brlx` :cite:`Kimmritz15`. diff --git a/doc/source/user_guide/ug_case_settings.rst b/doc/source/user_guide/ug_case_settings.rst index 1864593cf..8986bc9d8 100755 --- a/doc/source/user_guide/ug_case_settings.rst +++ b/doc/source/user_guide/ug_case_settings.rst @@ -66,7 +66,6 @@ to support the CICE model. "ICE_ACCOUNT", "string", "batch account number", "set by cice.setup, .cice_proj or by default" "ICE_QUEUE", "string", "batch queue name", "set by cice.setup or by default" "ICE_THREADED", "true, false", "force threading in compile, will always compile threaded if ICE_NTHRDS :math:`> 1`", "false" - "DITTO", "no, yes", "turn on bit-for-bit global sums via real16", "no" "ICE_BLDDEBUG", "true, false", "turn on compile debug flags", "false" @@ -112,10 +111,11 @@ Table of namelist options "","", "``m``", "write restart every ``dumpfreq_n`` months", "" "","", "``d``", "write restart every ``dumpfreq_n`` days", "" "","", "``h``", "write restart every ``dumpfreq_n`` hours", "" + "","", "``1``", "write restart every ``dumpfreq_n`` time step", "" "","``dumpfreq_n``", "integer", "frequency restart data is written", "1" "\*","``dump_last``", "true/false", "if true, write restart on last time step of simulation", "" "","", "", "**Model Output**", "" - "","``bfbflag``", "true/false", "for bit-for-bit diagnostic output", "" + "","``bfbflag``", "off/lsum4/lsum8/lsum16/ddpdd/reprosum", "global sum methods", "off" "\*","``diagfreq``", "integer", "frequency of diagnostic output in ``dt``", "24" "","", "*e.g.*, 10", "once every 10 time steps", "" "\*","``diag_type``", "``stdout``", "write diagnostic output to stdout", "" @@ -131,7 +131,7 @@ Table of namelist options "","", "``m``", "write history every ``histfreq_n`` months", "" "","", "``d``", "write history every ``histfreq_n`` days", "" "","", "``h``", "write history every ``histfreq_n`` hours", "" - "","", "``1``", "write history every time step", "" + "","", "``1``", "write history every ``histfreq_n`` time step", "" "","", "``x``", "unused frequency stream (not written)", "" "","``histfreq_n``", "integer array", "frequency history output is written", "" "","", "0", "do not write to history", "" diff --git a/doc/source/user_guide/ug_running.rst b/doc/source/user_guide/ug_running.rst index cd96e9d69..5be0a8683 100644 --- a/doc/source/user_guide/ug_running.rst +++ b/doc/source/user_guide/ug_running.rst @@ -7,6 +7,71 @@ Running CICE Quick-start instructions are provided in the :ref:`quickstart` section. +.. _software: + +Software Requirements +------- + +To run stand-alone, CICE requires + +- gmake (GNU Make) +- Fortran and C compilers (Intel, PGI, GNU, Cray, and NAG have been tested) +- NetCDF +- MPI (this is actually optional but without it you can only run on 1 processor) + +Below are lists of software versions that the Consortium has tested at some point. There is no +guarantee that all compiler versions work with all CICE model versions. At any given +point, the Consortium is regularly testing on several different compilers, but not +necessarily on all possible versions or combinations. A CICE goal is to be relatively portable +across different hardware, compilers, and other software. As a result, the coding +implementation tends to be on the conservative side at times. If there are problems +porting to a particular system, please let the Consortium know. + +The Consortium has tested the following compilers at some point, + +- Intel 15.0.3.187 +- Intel 16.0.1.150 +- Intel 17.0.1.132 +- Intel 17.0.2.174 +- Intel 17.0.5.239 +- Intel 18.0.1.163 +- Intel 19.0.2 +- Intel 19.0.3.199 +- PGI 16.10.0 +- GNU 6.3.0 +- GNU 7.2.0 +- GNU 7.3.0 +- Cray 8.5.8 +- Cray 8.6.4 +- NAG 6.2 + +The Consortium has tested the following mpi versions, + +- MPICH 7.3.2 +- MPICH 7.5.3 +- MPICH 7.6.2 +- MPICH 7.6.3 +- MPICH 7.7.6 +- Intel MPI 18.0.1 +- MPT 2.14 +- MPT 2.17 +- MPT 2.18 +- MPT 2.19 +- OpenMPI 1.6.5 + +The NetCDF implementation is relatively general and should work with any version of NetCDF 3 or 4. The Consortium has tested + +- NetCDF 4.3.0 +- NetCDF 4.3.2 +- NetCDF 4.4.0 +- NetCDF 4.4.1.1.32 +- NetCDF 4.4.1.1 +- NetCDF 4.4.2 +- NetCDF 4.5.0 +- NetCDF 4.6.1.3 + +Please email the Consortium if this list can be extended. + .. _scripts: Scripts @@ -16,6 +81,8 @@ The CICE scripts are written to allow quick setup of cases and tests. Once a ca generated, users can manually modify the namelist and other files to custom configure the case. Several settings are available via scripts as well. +.. _overview: + Overview ~~~~~~~~ @@ -36,7 +103,7 @@ There are three usage modes, All modes will require use of ``--mach`` or ``-m`` to specify the machine and case and test modes can use ``--set`` or ``-s`` to define specific options. ``--test`` and ``--suite`` will require ``--testid`` to be set -and both of the test modes can use ``--bdir``, ``--bgen``, ``--bcmp``, and ``--diff`` to generate (save) results and compare results with prior results. +and both of the test modes can use ``--bdir``, ``--bgen``, ``--bcmp``, and ``--diff`` to generate (save) results and compare results with prior results as well as ``--tdir`` to specify the location of the test directory. Testing will be described in greater detail in the :ref:`testing` section. Again, ``cice.setup --help`` will show the latest usage information including @@ -78,9 +145,9 @@ Some hints: - To change the block sizes required at build time, edit the **cice.settings** file. - To change namelist, manually edit the **ice_in** file - To change batch settings, manually edit the top of the **cice.run** or **cice.test** (if running a test) file -- To turn on the debug compiler flags, set ``ICE_BLDDEBUG`` in **cice.setttings** to true +- To turn on the debug compiler flags, set ``ICE_BLDDEBUG`` in **cice.setttings** to true. It is also possible to use the ``debug`` option (``-s debug``) when creating the case with **cice.setup** to set this option automatically. - To change compiler options, manually edit the Macros file -- To clean the build before each compile, set ``ICE_CLEANBUILD`` in **cice.settings** to true. To not clean before the build, set ``ICE_CLEANBUILD`` in **cice.settings** to false +- To clean the build before each compile, set ``ICE_CLEANBUILD`` in **cice.settings** to true (this is the default value), or use the ``buildclean`` option (``-s buildclean``) when creating the case with **cice.setup**. To not clean before the build, set ``ICE_CLEANBUILD`` in **cice.settings** to false, or use the ``buildincremental`` option (``-s buildincremental``) when creating the case with **cice.setup**. It is recommended that the ``ICE_CLEANBUILD`` be set to true if there are any questions about whether the build is proceeding properly. To build and run:: @@ -151,8 +218,8 @@ by doing ``cice.setup --help``. The default CICE namelist and CICE settings are specified in the files **configuration/scripts/ice_in** and -**configuration/scripts/cice.settings** respectively. When picking a -preset setting (option), the set_env.setting and set_nml.setting will be used to +**configuration/scripts/cice.settings** respectively. When picking +settings (options), the set_env.setting and set_nml.setting will be used to change the defaults. This is done as part of the ``cice.setup`` and the modifications are resolved in the **cice.settings** and **ice_in** file placed in the case directory. If multiple options are chosen and then conflict, then the last @@ -162,6 +229,10 @@ Some of the options are ``debug`` which turns on the compiler debug flags +``buildclean`` which turns on the option to clean the build before each compile + +``buildincremental`` which turns off the option to clean the build before each compile + ``short``, ``medium``, ``long`` which change the batch time limit ``gx3``, ``gx1``, ``tx1`` are associate with grid specific settings @@ -201,7 +272,7 @@ Once the cases are created, users are free to modify the cice.settings and ice_i Porting ------- -To port, an **env.[machine]_[environment]** and **Macros.[machine]_[environment}** file have to be added to the +To port, an **env.[machine]_[environment]** and **Macros.[machine]_[environment]** file have to be added to the **configuration/scripts/machines/** directory and the **configuration/scripts/cice.batch.csh** file needs to be modified. In general, the machine is specified in ``cice.setup`` with ``--mach`` @@ -229,6 +300,45 @@ directory back to **configuration/scripts/machines/** and update the **configuration/scripts/cice.batch.csh** file, retest, and then add and commit the updated machine files to the repository. +.. _machvars: + +Machine variables +~~~~~~~~~~~~~~~~~~~~~~ + +There are several machine specific variables defined in the **env.$[machine]**. These +variables are used to generate working cases for a given machine, compiler, and batch +system. Some variables are optional. + +.. csv-table:: *Machine Settings* + :header: "variable", "format", "description" + :widths: 15, 15, 25 + + "ICE_MACHINE_ENVNAME", "string", "machine name" + "ICE_MACHINE_COMPILER", "string", "compiler" + "ICE_MACHINE_MAKE", "string", "make command" + "ICE_MACHINE_WKDIR", "string", "root work directory" + "ICE_MACHINE_INPUTDATA", "string", "root input data directory" + "ICE_MACHINE_BASELINE", "string", "root regression baseline directory" + "ICE_MACHINE_SUBMIT", "string", "batch job submission command" + "ICE_MACHINE_TPNODE", "integer", "machine maximum MPI tasks per node" + "ICE_MACHINE_MAXPES", "integer", "machine maximum total processors per job (optional)" + "ICE_MACHINE_MAXRUNLENGTH", "integer", "batch wall time limit in hours (optional)" + "ICE_MACHINE_ACCT", "string", "batch default account" + "ICE_MACHINE_QUEUE", "string", "batch default queue" + "ICE_MACHINE_BLDTHRDS", "integer", "number of threads used during build" + "ICE_MACHINE_QSTAT", "string", "batch job status command (optional)" + "ICE_MACHINE_QUIETMODE", "true/false", "flag to reduce build output (optional)" + +.. _cross_compiling: + +Cross-compiling +~~~~~~~~~~~~~~~ +It can happen that the model must be built on a platform and run on another, for example when the run environment is only available in a batch queue. The program **makdep** (see :ref:`overview`), however, is both compiled and run as part of the build process. + +In order to support this, the Makefile uses a variable ``CFLAGS_HOST`` that can hold compiler flags specfic to the build machine for the compilation of makdep. If this feature is needed, add the variable ``CFLAGS_HOST`` to the **Macros.[machine]_[environment]** file. For example : :: + + CFLAGS_HOST = -xHost + .. _account: Machine Account Settings @@ -289,3 +399,94 @@ should be rebuilt before being resubmitted. It is always recommended that users modify the scripts and input settings in the case directory, NOT the run directory. In general, files in the run directory are overwritten by versions in the case directory when the model is built, submitted, and run. + +.. _timeseries: + +Timeseries Plotting +------------------- + +The CICE scripts include two scripts that will generate timeseries figures from a +diagnostic output file, a Python version (``timeseries.py``) and a csh version +(``timeseries.csh``). Both scripts create the same set of plots, but the Python +script has more capabilities, and it's likely that the csh +script will be removed in the future. + +To use the ``timeseries.py`` script, the following requirements must be met: + +* Python v2.7 or later +* numpy Python package +* matplotlib Python package +* datetime Python package + +See :ref:`CodeCompliance` for additional information about how to setup the Python +environment, but we recommend using ``pip`` as follows: :: + + pip install --user numpy + pip install --user matplotlib + pip install --user datetime + +When creating a case or test via ``cice.setup``, the ``timeseries.csh`` and +``timeseries.py`` scripts are automatically copied to the case directory. +Alternatively, the plotting scripts can be found in ``./configuration/scripts``, and can be +run from any directory. + +The Python script can be passed a directory, a specific log file, or no directory at all: + + - If a directory is passed, the script will look either in that directory or in + directory/logs for a filename like cice.run*. As such, users can point the script + to either a case directory or the ``logs`` directory directly. The script will use + the file with the most recent creation time. + - If a specific file is passed the script parses that file, assuming that the file + matches the same form of cice.run* files. + - If nothing is passed, the script will look for log files or a ``logs`` directory in the + directory from where the script was run. + +For example: + +Run the timeseries script on the desired case. :: + +$ python timeseries.py /p/work1/turner/CICE_RUNS/conrad_intel_smoke_col_1x1_diag1_run1year.t00/ + +or :: + +$ python timeseries.py /p/work1/turner/CICE_RUNS/conrad_intel_smoke_col_1x1_diag1_run1year.t00/logs + +The output figures are placed in the directory where the ``timeseries.py`` script is run. + +The plotting script will plot the following variables by default, but you can also select +specific plots to create via the optional command line arguments. + + - total ice area (:math:`km^2`) + - total ice extent (:math:`km^2`) + - total ice volume (:math:`m^3`) + - total snow volume (:math:`m^3`) + - RMS ice speed (:math:`m/s`) + +For example, to plot only total ice volume and total snow volume :: + +$ python timeseries.py /p/work1/turner/CICE_RUNS/conrad_intel_smoke_col_1x1_diag1_run1year.t00/ --volume --snw_vol + +To generate plots for all of the cases within a suite with a testid, create and run a script such as :: + + #!/bin/csh + foreach dir (`ls -1 | grep testid`) + echo $dir + python timeseries.py $dir + end + +Plots are only made for a single output file at a time. The ability to plot output from +a series of cice.run* files is not currently possible, but may be added in the future. +However, using the ``--bdir`` option will plot two datasets (from log files) on the +same figure. + +For the latest help information for the script, run :: + +$ python timeseries.py -h + +The ``timeseries.csh`` script works basically the same way as the Python version, however it +does not include all of the capabilities present in the Python version. + +To use the C-Shell version of the script, :: + +$ ./timeseries.csh /p/work1/turner/CICE_RUNS/conrad_intel_smoke_col_1x1_diag1_run1year.t00/ + diff --git a/doc/source/user_guide/ug_testing.rst b/doc/source/user_guide/ug_testing.rst index aa6804701..c5f5a4e29 100644 --- a/doc/source/user_guide/ug_testing.rst +++ b/doc/source/user_guide/ug_testing.rst @@ -47,6 +47,9 @@ For individual tests, the following command line options can be set ``--testid`` ID specifies the testid. This is required for every use of ``--test`` and ``--suite``. This is a user defined string that will allow each test to have a unique case and run directory name. This is also required. +``--tdir`` PATH + specifies the test directory. Testcases will be created in this directory. (default is .) + ``--mach`` MACHINE (see :ref:`case_options`) ``--env`` ENVIRONMENT1 (see :ref:`case_options`) @@ -295,6 +298,12 @@ results.csh script in the [suite_name].[testid]:: To report the test results, as is required for Pull Requests to be accepted into the master the CICE Consortium code see :ref:`testreporting`. +If using the ``--tdir`` option, that directory must not exist before the script is run. The tdir directory will be +created by the script and it will be populated by all tests as well as scripts that support the +test suite:: + + ./cice.setup --suite base_suite --mach wolf --env gnu --testid myid --tdir /scratch/$user/testsuite.myid + Multiple suites are supported on the command line as comma separated arguments:: ./cice.setup --suite base_suite,decomp_suite --mach wolf --env gnu --testid myid @@ -353,6 +362,9 @@ following options are valid for suites, ``--acct`` ACCOUNT optional +``--tdir`` PATH + optional + ``--testid`` ID required @@ -380,94 +392,103 @@ Test Suite Examples :: ./cice.setup --suite base_suite --mach conrad --env cray --testid v01a - cd base_suite.v01a - #wait for runs to complete + cd testsuite.v01a + # wait for runs to complete + ./results.csh + + 2) **Basic test suite with user defined test directory** + + Specify suite, mach, env, testid, tdir. + :: + + ./cice.setup --suite base_suite --mach conrad --env cray --testid v01a --tdir /scratch/$user/ts.v01a + cd /scratch/$user/ts.v01a + # wait for runs to complete ./results.csh - 2) **Basic test suite on multiple environments** + 3) **Basic test suite on multiple environments** Specify multiple envs. :: ./cice.setup --suite base_suite --mach conrad --env cray,pgi,intel,gnu --testid v01a - cd base_suite.v01a - #wait for runs to complete + cd testsuite.v01a + # wait for runs to complete ./results.csh Each env can be run as a separate invokation of `cice.setup` but if that approach is taken, it is recommended that different testids be used. - 3) **Basic test suite with generate option defined** + 4) **Basic test suite with generate option defined** Add ``--set`` :: ./cice.setup --suite base_suite --mach conrad --env gnu --testid v01b --set diag1 - cd base_suite.v01b - #wait for runs to complete + cd testsuite.v01b + # wait for runs to complete ./results.csh If there are conflicts between the ``--set`` options in the suite and on the command line, the suite will take precedent. - 4) **Multiple test suites from a single command line** + 5) **Multiple test suites from a single command line** Add comma delimited list of suites :: ./cice.setup --suite base_suite,decomp_suite --mach conrad --env gnu --testid v01c - cd base_suite.v01c - #wait for runs to complete + cd testsuite.v01c + # wait for runs to complete ./results.csh - If there are redundant tests in multiple suites, the scripts will understand that and only - create one test. + If there are redundant tests in multiple suites, the scripts will understand that and only + create one test. - 5) **Basic test suite, store baselines in user defined name** + 6) **Basic test suite, store baselines in user defined name** Add ``--bgen`` :: ./cice.setup --suite base_suite --mach conrad --env cray --testid v01a --bgen cice.v01a - cd base_suite.v01a - #wait for runs to complete + cd testsuite.v01a + # wait for runs to complete ./results.csh - This will store the results in the default [bdir] directory under the subdirectory cice.v01a. + This will store the results in the default [bdir] directory under the subdirectory cice.v01a. - 6) **Basic test suite, store baselines in user defined top level directory** + 7) **Basic test suite, store baselines in user defined top level directory** Add ``--bgen`` and ``--bdir`` :: ./cice.setup --suite base_suite --mach conrad --env cray --testid v01a --bgen cice.v01a --bdir /tmp/user/CICE_BASELINES - cd base_suite.v01a - #wait for runs to complete + cd testsuite.v01a + # wait for runs to complete ./results.csh This will store the results in /tmp/user/CICE_BASELINES/cice.v01a. - 7) **Basic test suite, store baselines in auto-generated directory** + 8) **Basic test suite, store baselines in auto-generated directory** Add ``--bgen default`` :: ./cice.setup --suite base_suite --mach conrad --env cray --testid v01a --bgen default - cd base_suite.v01a - #wait for runs to complete + cd testsuite.v01a + # wait for runs to complete ./results.csh - This will store the results in the default [bdir] directory under a directory name generated by the script - that includes the hash and date. + This will store the results in the default [bdir] directory under a directory name generated by the script that includes the hash and date. - 8) **Basic test suite, compare to prior baselines** + 9) **Basic test suite, compare to prior baselines** Add ``--bcmp`` :: ./cice.setup --suite base_suite --mach conrad --env cray --testid v02a --bcmp cice.v01a - cd base_suite.v02a - #wait for runs to complete + cd testsuite.v02a + # wait for runs to complete ./results.csh This will compare to results saved in the baseline [bdir] directory under @@ -477,7 +498,7 @@ Test Suite Examples the CICE Consortium master code. You can use other regression options as well. (``--bdir`` and ``--bgen``) - 9) **Basic test suite, use of default string in regression testing** + 10) **Basic test suite, use of default string in regression testing** default is a special argument to ``--bgen`` and ``--bcmp``. When used, the scripts will automate generation of the directories. In the case of ``--bgen``, @@ -504,7 +525,23 @@ Test Suite Examples When this is invoked, a new set of baselines will be generated and compared to the prior results each time without having to change the arguments. - 10) **Create and test a custom suite** + 11) **Reusing a test suite** + + Add the buildincremental option (``-s buildincremental``). This permits the suite to be rerun without recompiling the whole code. + :: + + ./cice.setup --suite base_suite --mach conrad --env intel --testid v01b --set buildincremental + cd testsuite.v01b + # wait for runs to complete + ./results.csh + # modify code + ./suite.submit # or ./suite.run to run the suite interactively + # wait for runs to complete + ./results.csh + + Only modified files will be recompiled, and the suite will be rerun. + + 12) **Create and test a custom suite** Create your own input text file consisting of 5 columns of data, - Test @@ -526,8 +563,8 @@ Test Suite Examples :: ./cice.setup --suite mysuite --mach conrad --env cray --testid v01a --bgen default - cd mysuite.v01a - #wait for runs to complete + cd testsuite.v01a + # wait for runs to complete ./results.csh You can use all the standard regression testing options (``--bgen``, ``--bcmp``, @@ -557,7 +594,7 @@ To post results, once a test suite is complete, run ``results.csh`` and :: ./cice.setup --suite base_suite --mach conrad --env cray --testid v01a - cd base_suite.v01a + cd testsuite.v01a #wait for runs to complete ./results.csh ./report_results.csh @@ -782,6 +819,7 @@ hemispheres, and must exceed a critical value nominally set to test and the Two-Stage test described in the previous section are provided in :cite:`Hunke18`. +.. _CodeCompliance: Code Compliance Testing Procedure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -809,7 +847,7 @@ To install the necessary Python packages, the ``pip`` Python utility can be used pip install --user matplotlib To run the compliance test, setup a baseline run with the original baseline model and then -a perturbation run based on recent model changes. Use ``--sets qc`` in both runs in addition +a perturbation run based on recent model changes. Use ``--set qc`` in both runs in addition to other settings needed. Then use the QC script to compare history output, .. code-block:: bash @@ -833,11 +871,17 @@ Implementation notes: 1) Provide a pass/fail on each of the confidence intervals, 2) Facilitate output of a bitmap for each test so that locations of failures can be identified. -The cice.t-test.py requires memory to store multiple two-dimensional fields spanning +The ``cice.t-test.py`` requires memory to store multiple two-dimensional fields spanning 1825 unique timesteps, a total of several GB. An appropriate resource is needed to run the script. If the script runs out of memory on an interactive resource, try logging into a batch resource or finding a large memory node. +The ``cice.t-test.py`` script will also attempt to generate plots of the mean ice thickness +for both the baseline and test cases. Additionally, if the 2-stage test fails then the +script will attempt to plot a map showing the grid cells that failed the test. For a +full list of options, run ``python cice.t-test.py -h``. + + End-To-End Testing Procedure ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -903,51 +947,3 @@ If the regression comparisons fail, then you may want to run the QC test, INFO:__main__:Quality Control Test PASSED -.. _testplotting: - -Test Plotting ----------------- - -The CICE scripts include a script (``timeseries.csh``) that will generate timeseries -figures from a diagnostic output file. -When running a test suite, the ``timeseries.csh`` script is automatically copied to the suite directory. -If the ``timeseries.csh`` script is to be used on a test or case that is not a part of a test suite, -users will need to run the ``timeseries.csh`` script from the tests directory -(``./configuration/scripts/tests/timeseries.csh ./path/``), or copy it to a local directory. -When used with the test suites or given a path, it needs to be run in the directory -above the particular case being plotted, but it can also be run on isolated log files in the same directory, -without a path. - -For example: - -Run the test suite. :: - -$ ./cice.setup -m conrad -e intel --suite base_suite --testid t00 - -Wait for suite to finish then go to the directory. :: - -$ cd base_suite.t00 - -Run the timeseries script on the desired case. :: - -$ ./timeseries.csh /p/work1/turner/CICE_RUNS/conrad_intel_smoke_col_1x1_diag1_run1year.t00/ - -The output figures are placed in the directory where the ``timeseries.csh`` script is run. - -To generate plots for all of the cases within a suite with a testid, create and run a script such as :: - - #!/bin/csh - foreach dir (`ls -1 | grep testid`) - echo $dir - timeseries.csh $dir - end - - -This plotting script can be used to plot the following variables: - - - total ice area (:math:`km^2`) - - total ice extent (:math:`km^2`) - - total ice volume (:math:`m^3`) - - total snow volume (:math:`m^3`) - - RMS ice speed (:math:`m/s`) - diff --git a/doc/source/user_guide/ug_troubleshooting.rst b/doc/source/user_guide/ug_troubleshooting.rst index 7998eef6b..e34d1261b 100644 --- a/doc/source/user_guide/ug_troubleshooting.rst +++ b/doc/source/user_guide/ug_troubleshooting.rst @@ -190,6 +190,9 @@ Known bugs - Latitude and longitude fields in the history output may be wrong when using padding. +- History and restart files will not be written on the first timestep in + some cases. + Interpretation of albedos ---------------------------------------- diff --git a/icepack b/icepack index edf5fbf7c..0b442b3fe 160000 --- a/icepack +++ b/icepack @@ -1 +1 @@ -Subproject commit edf5fbf7c2dc6b6f5b6c4fd180c6ec579ee535e4 +Subproject commit 0b442b3fe96d65b3c54c5facbc69653e1b87b740