Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Graph coloring #84

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
out
*.swp
src/*.o
*.o
*.txt
xhpcg
bin/HPCG-Benchmark_3*.txt
bin/xhpcg
bin/hpcg20*.txt
Expand Down
186 changes: 186 additions & 0 deletions PP_MPI_OpenMP/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# -*- Makefile -*-

#arch = Linux_Serial
arch = PP_MPI_OMP
setup_file = setup/Make.$(arch)

include $(setup_file)


HPCG_DEPS = src/CG.o \
src/CG_ref.o \
src/TestCG.o \
src/ComputeResidual.o \
src/ExchangeHalo.o \
src/GenerateGeometry.o \
src/GenerateProblem.o \
src/GenerateProblem_ref.o \
src/CheckProblem.o \
src/MixedBaseCounter.o \
src/OptimizeProblem.o \
src/ReadHpcgDat.o \
src/ReportResults.o \
src/SetupHalo.o \
src/SetupHalo_ref.o \
src/TestSymmetry.o \
src/TestNorms.o \
src/WriteProblem.o \
src/YAML_Doc.o \
src/YAML_Element.o \
src/ComputeDotProduct.o \
src/ComputeDotProduct_ref.o \
src/mytimer.o \
src/ComputeOptimalShapeXYZ.o \
src/ComputeSPMV.o \
src/ComputeSPMV_ref.o \
src/ComputeSYMGS.o \
src/ComputeSYMGS_ref.o \
src/ComputeWAXPBY.o \
src/ComputeWAXPBY_ref.o \
src/ComputeMG_ref.o \
src/ComputeMG.o \
src/ComputeProlongation_ref.o \
src/ComputeRestriction_ref.o \
src/CheckAspectRatio.o \
src/OutputFile.o \
src/GenerateCoarseProblem.o \
src/init.o \
src/finalize.o

# These header files are included in many source files, so we recompile every file if one or more of these header is modified.
PRIMARY_HEADERS = ../src/Geometry.hpp ../src/SparseMatrix.hpp ../src/Vector.hpp ../src/CGData.hpp \
../src/MGData.hpp ../src/hpcg.hpp

all: bin/xhpcg
#sbatch ./bin/job.sh
export OMP_NUM_THREADS=2 && mpiexec -n 4 ./bin/xhpcg
mv ./*.txt ./result/HPCG

bin/xhpcg: src/main.o $(HPCG_DEPS)
$(LINKER) $(LINKFLAGS) src/main.o $(HPCG_DEPS) $(HPCG_LIBS) -o bin/xhpcg

clean:
rm -f src/*.o bin/xhpcg

.PHONY: all clean

src/main.o: ../src/main.cpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/CG.o: ../src/CG.cpp ../src/CG.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/CG_ref.o: ../src/CG_ref.cpp ../src/CG_ref.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/TestCG.o: ../src/TestCG.cpp ../src/TestCG.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/ComputeResidual.o: ../src/ComputeResidual.cpp ../src/ComputeResidual.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/ExchangeHalo.o: ../src/ExchangeHalo.cpp ../src/ExchangeHalo.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/GenerateGeometry.o: ../src/GenerateGeometry.cpp ../src/GenerateGeometry.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/GenerateProblem.o: ../src/GenerateProblem.cpp ../src/GenerateProblem.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/GenerateProblem_ref.o: ../src/GenerateProblem_ref.cpp ../src/GenerateProblem_ref.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/CheckProblem.o: ../src/CheckProblem.cpp ../src/CheckProblem.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/MixedBaseCounter.o: ../src/MixedBaseCounter.cpp ../src/MixedBaseCounter.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/OptimizeProblem.o: ../src/OptimizeProblem.cpp ../src/OptimizeProblem.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/ReadHpcgDat.o: ../src/ReadHpcgDat.cpp ../src/ReadHpcgDat.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/ReportResults.o: ../src/ReportResults.cpp ../src/ReportResults.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/SetupHalo.o: ../src/SetupHalo.cpp ../src/SetupHalo.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/SetupHalo_ref.o: ../src/SetupHalo_ref.cpp ../src/SetupHalo_ref.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/TestSymmetry.o: ../src/TestSymmetry.cpp ../src/TestSymmetry.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/TestNorms.o: ../src/TestNorms.cpp ../src/TestNorms.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/WriteProblem.o: ../src/WriteProblem.cpp ../src/WriteProblem.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/YAML_Doc.o: ../src/YAML_Doc.cpp ../src/YAML_Doc.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/YAML_Element.o: ../src/YAML_Element.cpp ../src/YAML_Element.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/ComputeDotProduct.o: ../src/ComputeDotProduct.cpp ../src/ComputeDotProduct.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/ComputeDotProduct_ref.o: ../src/ComputeDotProduct_ref.cpp ../src/ComputeDotProduct_ref.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/finalize.o: ../src/finalize.cpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/init.o: ../src/init.cpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/mytimer.o: ../src/mytimer.cpp ../src/mytimer.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/ComputeOptimalShapeXYZ.o: ../src/ComputeOptimalShapeXYZ.cpp ../src/ComputeOptimalShapeXYZ.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/ComputeSPMV.o: ../src/ComputeSPMV.cpp ../src/ComputeSPMV.hpp $(PRIMARY_HEADERS)
$(CXX) -c -march=native $(CXXFLAGS) -I../src $< -o $@

src/ComputeSPMV_ref.o: ../src/ComputeSPMV_ref.cpp ../src/ComputeSPMV_ref.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/ComputeSYMGS.o: ../src/ComputeSYMGS.cpp ../src/ComputeSYMGS.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/ComputeSYMGS_ref.o: ../src/ComputeSYMGS_ref.cpp ../src/ComputeSYMGS_ref.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/ComputeWAXPBY.o: ../src/ComputeWAXPBY.cpp ../src/ComputeWAXPBY.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/ComputeWAXPBY_ref.o: ../src/ComputeWAXPBY_ref.cpp ../src/ComputeWAXPBY_ref.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/ComputeMG_ref.o: ../src/ComputeMG_ref.cpp ../src/ComputeMG_ref.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/ComputeMG.o: ../src/ComputeMG.cpp ../src/ComputeMG.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/ComputeProlongation_ref.o: ../src/ComputeProlongation_ref.cpp ../src/ComputeProlongation_ref.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/ComputeRestriction_ref.o: ../src/ComputeRestriction_ref.cpp ../src/ComputeRestriction_ref.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/GenerateCoarseProblem.o: ../src/GenerateCoarseProblem.cpp ../src/GenerateCoarseProblem.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/CheckAspectRatio.o: ../src/CheckAspectRatio.cpp ../src/CheckAspectRatio.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

src/OutputFile.o: ../src/OutputFile.cpp ../src/OutputFile.hpp $(PRIMARY_HEADERS)
$(CXX) -c $(CXXFLAGS) -I../src $< -o $@

4 changes: 4 additions & 0 deletions PP_MPI_OpenMP/bin/hpcg.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
HPCG benchmark input file
Sandia National Laboratories; University of Tennessee, Knoxville
96 96 96
64
8 changes: 8 additions & 0 deletions PP_MPI_OpenMP/bin/job.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
#SBATCH -o ./result/SBATCH/job.%j.out
#SBATCH -J HPCG_g7_SZW
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=64

export OMP_NUM_THREADS=4
mpiexec -n 32 /public1/home/sc81071/CAS_PP2024/sichengtaotao/hpcg/PP_MPI_OpenMP/bin/xhpcg
4 changes: 4 additions & 0 deletions PP_MPI_OpenMP/hpcg.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
HPCG benchmark input file
Sandia National Laboratories; University of Tennessee, Knoxville
96 96 96
64
127 changes: 127 additions & 0 deletions PP_MPI_OpenMP/result/HPCG/HPCG-Benchmark_3.1_2024-06-29_15-10-58.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
HPCG-Benchmark
version=3.1
Release date=March 28, 2019
Machine Summary=
Machine Summary::Distributed Processes=8
Machine Summary::Threads per processes=8
Global Problem Dimensions=
Global Problem Dimensions::Global nx=32
Global Problem Dimensions::Global ny=32
Global Problem Dimensions::Global nz=32
Processor Dimensions=
Processor Dimensions::npx=2
Processor Dimensions::npy=2
Processor Dimensions::npz=2
Local Domain Dimensions=
Local Domain Dimensions::nx=16
Local Domain Dimensions::ny=16
Local Domain Dimensions::Lower ipz=0
Local Domain Dimensions::Upper ipz=1
Local Domain Dimensions::nz=16
########## Problem Summary ##########=
Setup Information=
Setup Information::Setup Time=0.0218346
Linear System Information=
Linear System Information::Number of Equations=32768
Linear System Information::Number of Nonzero Terms=830584
Multigrid Information=
Multigrid Information::Number of coarse grid levels=3
Multigrid Information::Coarse Grids=
Multigrid Information::Coarse Grids::Grid Level=1
Multigrid Information::Coarse Grids::Number of Equations=4096
Multigrid Information::Coarse Grids::Number of Nonzero Terms=97336
Multigrid Information::Coarse Grids::Number of Presmoother Steps=1
Multigrid Information::Coarse Grids::Number of Postsmoother Steps=1
Multigrid Information::Coarse Grids::Grid Level=2
Multigrid Information::Coarse Grids::Number of Equations=512
Multigrid Information::Coarse Grids::Number of Nonzero Terms=10648
Multigrid Information::Coarse Grids::Number of Presmoother Steps=1
Multigrid Information::Coarse Grids::Number of Postsmoother Steps=1
Multigrid Information::Coarse Grids::Grid Level=3
Multigrid Information::Coarse Grids::Number of Equations=64
Multigrid Information::Coarse Grids::Number of Nonzero Terms=1000
Multigrid Information::Coarse Grids::Number of Presmoother Steps=1
Multigrid Information::Coarse Grids::Number of Postsmoother Steps=1
########## Memory Use Summary ##########=
Memory Use Information=
Memory Use Information::Total memory used for data (Gbytes)=0.0235669
Memory Use Information::Memory used for OptimizeProblem data (Gbytes)=0
Memory Use Information::Bytes per equation (Total memory / Number of Equations)=719.205
Memory Use Information::Memory used for linear system and CG (Gbytes)=0.0207158
Memory Use Information::Coarse Grids=
Memory Use Information::Coarse Grids::Grid Level=1
Memory Use Information::Coarse Grids::Memory used=0.00249264
Memory Use Information::Coarse Grids::Grid Level=2
Memory Use Information::Coarse Grids::Memory used=0.000316812
Memory Use Information::Coarse Grids::Grid Level=3
Memory Use Information::Coarse Grids::Memory used=4.1684e-05
########## V&V Testing Summary ##########=
Spectral Convergence Tests=
Spectral Convergence Tests::Result=PASSED
Spectral Convergence Tests::Unpreconditioned=
Spectral Convergence Tests::Unpreconditioned::Maximum iteration count=11
Spectral Convergence Tests::Unpreconditioned::Expected iteration count=12
Spectral Convergence Tests::Preconditioned=
Spectral Convergence Tests::Preconditioned::Maximum iteration count=2
Spectral Convergence Tests::Preconditioned::Expected iteration count=2
Departure from Symmetry |x'Ay-y'Ax|/(2*||x||*||A||*||y||)/epsilon=
Departure from Symmetry |x'Ay-y'Ax|/(2*||x||*||A||*||y||)/epsilon::Result=PASSED
Departure from Symmetry |x'Ay-y'Ax|/(2*||x||*||A||*||y||)/epsilon::Departure for SpMV=0
Departure from Symmetry |x'Ay-y'Ax|/(2*||x||*||A||*||y||)/epsilon::Departure for MG=0
########## Iterations Summary ##########=
Iteration Count Information=
Iteration Count Information::Result=PASSED
Iteration Count Information::Reference CG iterations per set=50
Iteration Count Information::Optimized CG iterations per set=51
Iteration Count Information::Total number of reference iterations=50
Iteration Count Information::Total number of optimized iterations=51
########## Reproducibility Summary ##########=
Reproducibility Information=
Reproducibility Information::Result=PASSED
Reproducibility Information::Scaled residual mean=7.57053e-20
Reproducibility Information::Scaled residual variance=0
########## Performance Summary (times in sec) ##########=
Benchmark Time Summary=
Benchmark Time Summary::Optimization phase=1.05e-07
Benchmark Time Summary::DDOT=0.0313035
Benchmark Time Summary::WAXPBY=0.00405827
Benchmark Time Summary::SpMV=0.00620048
Benchmark Time Summary::MG=0.0666923
Benchmark Time Summary::Total=0.108292
Floating Point Operations Summary=
Floating Point Operations Summary::Raw DDOT=1.00925e+07
Floating Point Operations Summary::Raw WAXPBY=1.00925e+07
Floating Point Operations Summary::Raw SpMV=8.63807e+07
Floating Point Operations Summary::Raw MG=4.78874e+08
Floating Point Operations Summary::Total=5.8544e+08
Floating Point Operations Summary::Total with convergence overhead=5.7396e+08
GB/s Summary=
GB/s Summary::Raw Read B/W=33.358
GB/s Summary::Raw Write B/W=7.71199
GB/s Summary::Raw Total B/W=41.0699
GB/s Summary::Total with convergence and optimization phase overhead=39.4689
GFLOP/s Summary=
GFLOP/s Summary::Raw DDOT=0.322409
GFLOP/s Summary::Raw WAXPBY=2.48691
GFLOP/s Summary::Raw SpMV=13.9313
GFLOP/s Summary::Raw MG=7.18035
GFLOP/s Summary::Raw Total=5.4061
GFLOP/s Summary::Total with convergence overhead=5.3001
GFLOP/s Summary::Total with convergence and optimization phase overhead=5.19535
User Optimization Overheads=
User Optimization Overheads::Optimization phase time (sec)=1.05e-07
User Optimization Overheads::Optimization phase time vs reference SpMV+MG time=2.68842e-05
DDOT Timing Variations=
DDOT Timing Variations::Min DDOT MPI_Allreduce time=0.00153632
DDOT Timing Variations::Max DDOT MPI_Allreduce time=0.0267875
DDOT Timing Variations::Avg DDOT MPI_Allreduce time=0.0142139
Final Summary=
Final Summary::HPCG result is VALID with a GFLOP/s rating of=5.19535
Final Summary::HPCG 2.4 rating for historical reasons is=5.3001
Final Summary::Reference version of ComputeDotProduct used=Performance results are most likely suboptimal
Final Summary::Reference version of ComputeSPMV used=Performance results are most likely suboptimal
Final Summary::Reference version of ComputeMG used and number of threads greater than 1=Performance results are severely suboptimal
Final Summary::Reference version of ComputeWAXPBY used=Performance results are most likely suboptimal
Final Summary::Results are valid but execution time (sec) is=0.108292
Final Summary::You have selected the QuickPath option=Results are official for legacy installed systems with confirmation from the HPCG Benchmark leaders.
Final Summary::After confirmation please upload results from the YAML file contents to=http://hpcg-benchmark.org
11 changes: 11 additions & 0 deletions PP_MPI_OpenMP/result/HPCG/hpcg20240629T151058.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
WARNING: PERFORMING UNPRECONDITIONED ITERATIONS
Call [0] Number of Iterations [11] Scaled Residual [1.08807e-14]
WARNING: PERFORMING UNPRECONDITIONED ITERATIONS
Call [1] Number of Iterations [11] Scaled Residual [1.08946e-14]
Call [0] Number of Iterations [2] Scaled Residual [7.51788e-17]
Call [1] Number of Iterations [2] Scaled Residual [7.51788e-17]
Departure from symmetry (scaled) for SpMV abs(x'*A*y - y'*A*x) = 0
Departure from symmetry (scaled) for MG abs(x'*Minv*y - y'*Minv*x) = 0
SpMV call [0] Residual [0]
SpMV call [1] Residual [0]
Call [0] Scaled Residual [7.57053e-20]
Empty file.
Empty file.
Empty file.
Empty file.
Loading