Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 7 additions & 13 deletions omp_4.0/Makefile → omp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ MPI_INC = /opt/local/include/openmpi
MPI_LIB = /opt/local/lib

# Point your mpicc to Clang
CXX = mpicc
CXX ?= mpicc

SOURCES2.0 = \
lulesh.cc \
Expand All @@ -20,30 +20,24 @@ SOURCES2.0 = \
lulesh-init.cc
OBJECTS2.0 = $(SOURCES2.0:.cc=.o)

teams =
teams =
ifdef TEAMS
teams = -DTEAMS=$(TEAMS)
endif

threads =
threads =
ifdef THREADS
threads = -DTHREADS=$(THREADS)
endif

gpu =
mpi=-DUSE_MPI=0
gpu=-DUSE_GPU=1
ifdef USE_GPU
gpu = -DUSE_GPU=$(USE_GPU)
endif

mpi = -DUSE_MPI=0
ifdef USE_MPI
mpi = -DUSE_MPI=$(USE_MPI)
endif

# Tuning flags for Power 8
CXXFLAGS = -mcpu=pwr8 -mtune=pwr8 -fopenmp=libomp -O3 -omptargets=nvptx64sm_35-nvidia-linux $(shared) $(mpi) $(teams) $(threads) $(gpu)

LDFLAGS = -L/usr/local/cuda/nvvm/libdevice
CXXFLAGS= -O3 -fopenmp -fopenmp-targets=nvptx64 -Xopenmp-target=nvptx64 -march=sm_70 -fopenmp-offload-mandatory -foffload-lto $(shared) $(mpi) $(teams) $(threads) $(gpu)

.cc.o: lulesh.h
@echo "Building $<"
Expand All @@ -53,7 +47,7 @@ all: $(LULESH_EXEC)

lulesh2.0: $(OBJECTS2.0)
@echo "Linking"
$(CXX) -fopenmp=libomp -omptargets=nvptx64sm_35-nvidia-linux $(OBJECTS2.0) $(LDFLAGS) -lomp -lomptarget -lstdc++ -lm -o $@
$(CXX) $(CXXFLAGS) $(OBJECTS2.0) $(LDFLAGS) -lm -o $@

clean:
/bin/rm -f *.o *~ *.tgt* $(OBJECTS) $(LULESH_EXEC)
Expand Down
File renamed without changes.
File renamed without changes.
20 changes: 20 additions & 0 deletions omp_4.0/lulesh-init.cc → omp/lulesh-init.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,23 @@ Domain::Domain(Int_t numRanks, Index_t colLoc,
m_numNode = edgeNodes*edgeNodes*edgeNodes ;

m_regNumList = new Index_t[numElem()] ; // material indexset
//

m_dvdx = new Real_t[numElem() * 8];
m_dvdy = new Real_t[numElem() * 8];
m_dvdz = new Real_t[numElem() * 8];

m_x8n = new Real_t[ numElem() * 8 ];
m_y8n = new Real_t[ numElem() * 8 ];
m_z8n = new Real_t[ numElem() * 8 ];

m_determ = new Real_t[numElem()];

m_sigxx = new Real_t[numElem()];
m_sigyy = new Real_t[numElem()];
m_sigzz = new Real_t[numElem()];
m_vnew = new Real_t[numElem()];


// Elem-centered
AllocateElemPersistent(numElem()) ;
Expand Down Expand Up @@ -247,6 +264,9 @@ Domain::SetupThreadSupportStructures()
#else
Index_t numthreads = 1;
#endif
m_fx_elem = new Real_t[numElem() * 8];
m_fy_elem = new Real_t[numElem() * 8];
m_fz_elem = new Real_t[numElem() * 8];

if (numthreads > 1) {
// set up node-centered indexing of elements
Expand Down
2 changes: 1 addition & 1 deletion omp_4.0/lulesh-util.cc → omp/lulesh-util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ void VerifyAndWriteFinalOutput(Real_t elapsed_time,
printf("Run completed: \n");
printf(" Problem size = %i \n", nx);
printf(" MPI tasks = %i \n", numRanks);
printf(" Iteration count = %i \n", locDom.cycle());
printf(" Iteration count = %d \n", locDom.cycle());
printf(" Final Origin Energy = %12.6e \n", locDom.e(ElemId));

Real_t MaxAbsDiff = Real_t(0.0);
Expand Down
File renamed without changes.
Loading