Skip to content

Commit

Permalink
Snapshot of kokkos-kernels.git from commit 6e8e97a977564673bdadc15085…
Browse files Browse the repository at this point in the history
…d27a6a56b98114

From repository at [email protected]:kokkos/kokkos-kernels.git

At commit:
commit 6e8e97a977564673bdadc15085d27a6a56b98114
Merge: 00b1648 f81778c
Author: Nathan Ellingwood <[email protected]>
Date:   Wed Mar 7 16:29:28 2018 -0700

    Merge branch 'develop' for 2.6.00

    Part of Kokkos C++ Performance Portability Programming EcoSystem 2.6
  • Loading branch information
ndellingwood committed Mar 7, 2018
1 parent dee20c5 commit 35f8c36
Show file tree
Hide file tree
Showing 185 changed files with 17,198 additions and 11,498 deletions.
9 changes: 0 additions & 9 deletions packages/kokkos-kernels/.gitignore

This file was deleted.

14 changes: 14 additions & 0 deletions packages/kokkos-kernels/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# Change Log

## [2.6.00](https:/kokkos/kokkos-kernels/tree/2.6.00) (2018-03-07)
[Full Changelog](https:/kokkos/kokkos-kernels/compare/2.5.00...2.6.00)

**Implemented enhancements:**

- Spgemm hash promotion [\#171](https:/kokkos/kokkos-kernels/issues/171)
- Batched BLAS enhancement [\#170](https:/kokkos/kokkos-kernels/issues/170)

**Fixed bugs:**

- d2\_graph\_color doesn't have a default coloring mechanism [\#168](https:/kokkos/kokkos-kernels/issues/168)
- Build error when MKL TPL is enabled [\#135](https:/kokkos/kokkos-kernels/issues/135)


## [2.5.00](https:/kokkos/kokkos-kernels/tree/2.5.00) (2017-12-15)
[Full Changelog](https:/kokkos/kokkos-kernels/compare/0.10.03...2.5.00)

Expand Down
2 changes: 2 additions & 0 deletions packages/kokkos-kernels/Makefile.kokkos-kernels
Original file line number Diff line number Diff line change
Expand Up @@ -389,8 +389,10 @@ endif
KOKKOSKERNELS_INTERNAL_HEADERS = $(wildcard ${KOKKOSKERNELS_PATH}/src/impl/*.hpp)
KOKKOSKERNELS_INTERNAL_HEADERS += $(wildcard ${KOKKOSKERNELS_PATH}/src/impl/generated_specializations_hpp/*/*eti_spec*.hpp)

ifdef KOKKOSKERNELS_INTERNAL_SRC_SPARSE
vpath %.cpp $(sort $(dir $(KOKKOSKERNELS_INTERNAL_SRC_SPARSE)))
vpath %.cpp $(sort $(dir $(KOKKOSKERNELS_INTERNAL_SRC_BLAS)))
endif

DEPFLAGS = -M

Expand Down
30 changes: 30 additions & 0 deletions packages/kokkos-kernels/README
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,36 @@ Building Kokkoskernels
1. Modify example/buildlib/compileKokkosKernelsSimple.sh or
example/buildlib/compileKokkosKernels.sh for your environment
and run it to generate the required makefiles.
- KOKKOS_DEVICES can be as below. You can remove any backend
that you don't need. If cuda backend is used, CXX compiler should point to ${KOKKOS_PATH}/config/nvcc_wrapper.
If you enable Cuda, a host space, either OpenMP or Serial should be enabled.
KOKKOS_DEVICES=OpenMP,Serial,Cuda

- For the best performance give the architecture flag to proper architecture.
e.g. KNLs: KOKKOS_ARCHS=KNL, KOKKOS_ARCHS=HSW.
If you compile for P100 GPUs with Power8 Processor, give both architectures.
KOKKOS_ARCHS=Pascal60,Power8

For the architecture flags, run below command.
%: scripts/generate_makefile.bash --help
2. Run "make build-test" to compile the tests.


Using Kokkoskernels Test Drivers
--------------------------

In perf_test there are test drivers.

-- KokkosGraph_triangle.exe : Triangle counting driver.
-- KokkosSparse_spgemm.exe : Sparse Matrix Sparse Matrix Multiply:
*****NOTE: KKMEM is outdated. Use default algorithm: KKSPGEMM = KKDEFAULT = DEFAULT****
Or within the code:
kh.create_spgemm_handle(KokkosSparse::SPGEMM_KK);
-- KokkosSparse_spmv.exe : Sparse matvec.
-- KokkosSparse_pcg.exe: CG method with Gauss Seidel as preconditioner.
-- KokkosGraph_color.exe: Distance-1 Graph coloring
-- KokkosKernels_MatrixConverter.exe: given a matrix market format, converts it ".bin"
binary format for fast input output readings, which can be read by other test drivers.

Please report bugs or performance issues to: https:/kokkos/kokkos-kernels/issues

54 changes: 54 additions & 0 deletions packages/kokkos-kernels/example/wiki/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
KOKKOS_PATH ?= ${HOME}/Kokkos/kokkos
KOKKOSKERNELS_PATH ?= ${HOME}/Kokkos/kokkos-kernels

# Turn of ETI
KOKKOSKERNELS_SCALARS =

SRC ?= $(wildcard *.cpp)
HDR ?= $(wildcard *.hpp)

KOKKOS_DEVICES=OpenMP
KOKKOS_ARCH = "SNB,Kepler35"
KOKKOS_CUDA_OPTIONS=enable_lambda

default: build
echo "Start Build"

ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = ${KOKKOS_PATH}/config/nvcc_wrapper
else
CXX = g++
endif

LINK = ${CXX}

CXXFLAGS = -O3 -g
override CXXFLAGS += -I./
LINKFLAGS =

EXE = test.x
DEPFLAGS = -M

vpath %.cpp $(sort $(dir $(SRC)))

OBJ = $(notdir $(SRC:.cpp=.o))
LIB =

include $(KOKKOS_PATH)/Makefile.kokkos
include ${KOKKOSKERNELS_PATH}/Makefile.kokkos-kernels

$(warning $(OBJ) $(EXE) $(sort $(dir $(SRC))))

build: $(EXE)

$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(KOKKOSKERNELS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(KOKKOSKERNELS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(KOKKOSKERNELS_LIBS) $(LIB) -o $(EXE)

clean: kokkos-clean
rm -f *.o *.cuda *.host

# Compilation rules

%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(HDR)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(KOKKOSKERNELS_CPPFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)

14 changes: 14 additions & 0 deletions packages/kokkos-kernels/example/wiki/blas/abs/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))

ifndef KOKKOSKERNELS_PATH
KOKKOSKERNELS_PATH = $(MAKEFILE_PATH)../../../..
endif
ifndef KOKKOS_PATH
KOKKOS_PATH = $(KOKKOSKERNELS_PATH)/../kokkos
endif

SRC = $(wildcard $(MAKEFILE_PATH)*.cpp)
HDR = $(wildcard $(MAKEFILE_PATH)*.hpp)

include $(KOKKOSKERNELS_PATH)/example/wiki/Makefile

24 changes: 24 additions & 0 deletions packages/kokkos-kernels/example/wiki/blas/abs/abs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include<Kokkos_Core.hpp>
#include<KokkosBlas1_abs.hpp>

int main(int argc, char* argv[]) {
Kokkos::initialize();

int N = atoi(argv[1]);

Kokkos::View<double*> x("X",N);
Kokkos::View<double*> y("Y",N);
Kokkos::deep_copy(x,-1.0);

KokkosBlas::abs(y,x);

double sum = 0.0;
Kokkos::parallel_reduce("CheckValue", N, KOKKOS_LAMBDA (const int& i, double& lsum) {
lsum += y(i);
},sum);

printf("Sum: %lf Expected: %lf Diff: %e\n",sum,1.0*N,sum-1.0*N);

Kokkos::finalize();
}

1 change: 1 addition & 0 deletions packages/kokkos-kernels/master_history.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
tag: 0.10.00 date: 08/22/1017 master: xxxxxxxx develop: e323bcb0
tag: 0.10.03 date: 09/11/2017 master: e323bcb0 develop: 4cb87390
tag: 2.5.00 date: 12/15/2017 master: e4c645e9 develop: 04d58766
tag: 2.6.00 date: 03/07/2018 master: 00b16484 develop: f81778ce

24 changes: 22 additions & 2 deletions packages/kokkos-kernels/perf_test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,31 @@ EXECUTABLES += $(wildcard ${KOKKOSKERNELS_SRC_PATH}/perf_test/sparse/*cpp)
EXECUTABLES += $(wildcard ${KOKKOSKERNELS_SRC_PATH}/perf_test/graph/*cpp)
EXECUTABLES += $(wildcard ${KOKKOSKERNELS_SRC_PATH}/perf_test/common/*cpp)



#=======================================================================
#===================== BATCHED BLAS/LAPACK PERF ========================
#=======================================================================

BATCHEDINC = -I$(KOKKOSKERNELS_SRC_PATH)/perf_test/batched
INC += $(BATCHEDINC)

BATCHED_HEADERS = $(wildcard $(KOKKOSKERNELS_SRC_PATH)/perf_test/batched/*.hpp)
TEST_HEADERS += $(BATCHED_HEADERS)

BATCHED_EXECUTABLES = $(wildcard ${KOKKOSKERNELS_SRC_PATH}/perf_test/batched/*cpp)
EXECUTABLES += $(BATCHED_EXECUTABLES)

#=======================================================================
#===================== TEST TARGETS ====================================
#=======================================================================

vpath %.cpp $(sort $(dir $(EXECUTABLES)))
OBJS = $(notdir $(EXECUTABLES:.cpp=.o))
TEST_TARGETS = $(notdir $(EXECUTABLES:.cpp=.exe))

#TEST_TARGETS = $(patsubst %.cpp, %.exe, $(EXECUTABLES))


#=======================================================================
#=================== Make Rules ========================================
#=======================================================================
Expand All @@ -93,7 +113,7 @@ clean: kokkos-clean kokkoskernels-clean
default: $(TEST_TARGETS)

build: $(TEST_TARGETS)

%.exe:%.o $(KOKKOS_LINK_DEPENDS) $(KOKKOSKERNELS_LINK_DEPENDS) $(TEST_HEADERS)
$(LINK) $(EXTRA_PATH) $< $(KOKKOSKERNELS_LDFLAGS) $(KOKKOSKERNELS_LIBS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o $@

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/* Implementation for testing KokkosKernels on BCRS operations
- block-tridiagonal factorization
- block-tridiagonal solve
- bcrs matvec
StructuredBlock represents a 3D mesh having ni, nj, nk cells in each
dimension. Variable ordering is such that the k index is the fastest and the
i index is slowest. Smoothing lines are built in the k direction.
BlockCrsMatrix is a simple block CRS data structure.
BlockTridiagMatrices holds the block tridiagonal matrices.
An example run is
./driver -ni 32 -nj 32 -nk 128 -bs 5 -c
This runs a sequence of unit tests, then runs a problem having a 32x32x128
structured block with the lines oriented along the third dimension (line
length = 128). The block size is 5. -c adds a somewhat expensive check of the
answer. It's good to run with -c once in a while, but the cheap unit tests
that always run before the big problem already provide good coverage.
*/

#include "Kokkos_Core.hpp"
#include "impl/Kokkos_Timer.hpp"

#if defined(KOKKOS_ENABLE_CUDA)
#define __KOKKOSBATCHED_TEST_ENABLE_CUDA__

#include "KokkosBatched_Util.hpp"

#define KOKKOSBATCHED_USE_UNBLOCKED_ALGO 1
//#define KOKKOSBATCHED_USE_BLOCKED_ALGO 1

#if defined (KOKKOSBATCHED_USE_UNBLOCKED_ALGO)
typedef KokkosBatched::Experimental::Algo::LU::Unblocked AlgoLU;
typedef KokkosBatched::Experimental::Algo::Trsm::Unblocked AlgoTrsm;
typedef KokkosBatched::Experimental::Algo::Gemm::Unblocked AlgoGemm;

typedef KokkosBatched::Experimental::Algo::Trsv::Unblocked AlgoTrsv;
typedef KokkosBatched::Experimental::Algo::Gemv::Unblocked AlgoGemv;
#endif
#if defined (KOKKOSBATCHED_USE_BLOCKED_ALGO)
typedef KokkosBatched::Experimental::Algo::LU::Blocked AlgoLU;
typedef KokkosBatched::Experimental::Algo::Trsm::Blocked AlgoTrsm;
typedef KokkosBatched::Experimental::Algo::Gemm::Blocked AlgoGemm;

typedef KokkosBatched::Experimental::Algo::Trsv::Blocked AlgoTrsv;
typedef KokkosBatched::Experimental::Algo::Gemv::Blocked AlgoGemv;
#endif

#include "KokkosBatched_Test_BlockCrs.hpp"

using namespace KokkosBatched;

int main (int argc, char *argv[]) {
Kokkos::initialize(argc, argv);

typedef Kokkos::DefaultHostExecutionSpace HostSpaceType;
typedef Kokkos::DefaultExecutionSpace DeviceSpaceType;

const bool detail = false;

Kokkos::print_configuration(std::cout, detail);

enum : int { VectorLength = DefaultVectorLength<Test::scalar_type,typename DeviceSpaceType::memory_space>::value,
RangeTagOper = 0,
TeamTagOper = 1 };

// Unit tests
bool profile = false;
for (int i=1;i<argc;++i) {
const std::string& token = argv[i];
if (strncmp(token.c_str(), "-profile", 8) == 0) profile = true;
}


if (!profile) {
// std::cout << " Unit Test::Range :: Begin\n";
// {
// Test::run<DeviceSpaceType,Test::scalar_type,VectorLength,RangeTagOper>( 3, 4, 2, 25, 2);
// Test::run<DeviceSpaceType,Test::scalar_type,VectorLength,RangeTagOper>(44, 63, 15, 4, 1);
// Test::run<DeviceSpaceType,Test::scalar_type,VectorLength,RangeTagOper>( 2, 2, 15, 3, 3);
// Test::run<DeviceSpaceType,Test::scalar_type,VectorLength,RangeTagOper>( 1, 1, 2, 63, 8);

// for (int nrhs=1;nrhs<=33;++nrhs)
// Test::run<DeviceSpaceType,Test::scalar_type,VectorLength,RangeTagOper>(2, 2, 15, 3, nrhs);
// }
// std::cout << " Unit Test::Range :: End\n";

std::cout << " Unit Test::Team :: Begin\n";
{
Test::run<DeviceSpaceType,Test::scalar_type,VectorLength,TeamTagOper>( 3, 4, 2, 25, 2);
Test::run<DeviceSpaceType,Test::scalar_type,VectorLength,TeamTagOper>(44, 63, 15, 4, 1);
Test::run<DeviceSpaceType,Test::scalar_type,VectorLength,TeamTagOper>( 2, 2, 15, 3, 3);
Test::run<DeviceSpaceType,Test::scalar_type,VectorLength,TeamTagOper>( 1, 1, 2, 63, 8);

for (int nrhs=1;nrhs<=33;++nrhs)
Test::run<DeviceSpaceType,Test::scalar_type,VectorLength,TeamTagOper>(2, 2, 15, 3, nrhs);
}
std::cout << " Unit Test::Team :: End\n";
}

// Performance tests
std::cout << " Perf Test:: Begin\n";
{
const Test::Input<DeviceSpaceType> input(argc, argv);
Test::run<DeviceSpaceType,Test::scalar_type,VectorLength>(input);
}
std::cout << " Perf Test:: End\n";

Kokkos::finalize();

return 0;
}
#else

int main(int argc, char *argv[]) {
std::cout << "Kokkos::Cuda is not enabled\n";
return -1;
}

#endif
Loading

0 comments on commit 35f8c36

Please sign in to comment.