Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions kokkos/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR) # same as Kokkos 4.3

option(MINIAERO_ENABLE_MPI "Enable MPI support" ON)
option(MINIAERO_EXTERNAL_KOKKOS "Use out-of-tree Kokkos" OFF)

project(miniFE LANGUAGES CXX)

# needed for View::dimension_0
if (MINIAERO_EXTERNAL_KOKKOS)
find_package(Kokkos REQUIRED)
else()
set(Kokkos_ENABLE_DEPRECATED_CODE ON CACHE BOOL "" FORCE)
include(FetchContent)
message(STATUS "Fetch, configure, and build Kokkos 4.3.00...")
FetchContent_Declare(Kokkos
GIT_REPOSITORY git@github.com:kokkos/kokkos.git
GIT_TAG 4.3.00
)
FetchContent_MakeAvailable(Kokkos)
endif()

if(MINIAERO_ENABLE_MPI)
find_package(MPI REQUIRED)
else()
find_package(MPI) # okay if we can't find MPI if the user didn't ask for it
endif()

file(GLOB MINIAERO_SRCS "*.C")
set(MINIAERO_INCL_DIRS ${CMAKE_CURRENT_LIST_DIR})

add_executable(miniAero.kokkos ${MINIAERO_SRCS})
target_link_libraries(miniAero.kokkos Kokkos::kokkos)
foreach(INCL_DIR ${MINIAERO_INCL_DIRS})
target_include_directories(miniAero.kokkos PRIVATE ${INCL_DIR})
endforeach()
target_compile_definitions(miniAero.kokkos PRIVATE ATOMICS_FLUX) # needed for correctness

if (MPI_FOUND AND MINIAERO_ENABLE_MPI)
message(STATUS "MPI_VERSION = ${MPI_VERSION}")
message(STATUS "MPI_CXX_COMPILER = ${MPI_CXX_COMPILER}")
message(STATUS "MPI_CXX_COMPILE_OPTIONS = ${MPI_CXX_COMPILE_OPTIONS}")
message(STATUS "MPI_CXX_COMPILE_DEFINITIONS = ${MPI_CXX_COMPILE_DEFINITIONS}")
message(STATUS "MPI_CXX_INCLUDE_DIRS = ${MPI_CXX_INCLUDE_DIRS}")
message(STATUS "MPI_CXX_LINK_FLAGS = ${MPI_CXX_LINK_FLAGS}")
message(STATUS "MPI_CXX_LIBRARIES = ${MPI_CXX_LIBRARIES}")
target_compile_definitions(miniAero.kokkos PRIVATE WITH_MPI=1)
target_link_libraries(miniAero.kokkos MPI::MPI_CXX)
endif()

enable_testing()

add_test(
NAME AllTests
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/tests
COMMAND run_tests.sh ${CMAKE_CURRENT_BINARY_DIR}/miniAero.kokkos
)
if (MPI_FOUND AND MINIAERO_ENABLE_MPI)
add_test(
NAME AllTests-MPI
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/tests
COMMAND run_tests.sh ${CMAKE_CURRENT_BINARY_DIR}/miniAero.kokkos MPI
)
endif()
8 changes: 4 additions & 4 deletions kokkos/Faces.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,20 +125,20 @@ void copy_faces(Faces<Device> device_faces, std::vector<Face> & mesh_faces){
Kokkos::deep_copy(device_faces.face_tangent_, face_tangent);
Kokkos::deep_copy(device_faces.face_binormal_, face_binormal);

if(device_faces.face_cell_conn_.dimension_0() > 0) {
if(device_faces.face_cell_conn_.extent(0) > 0) {
typedef Kokkos::View<int *, Kokkos::LayoutStride, Device> view_type;
typedef Kokkos::BinOp1D< view_type > CompType;
view_type face_cell_left = Kokkos::subview(device_faces.face_cell_conn_,Kokkos::ALL(),0);

typedef Kokkos::Experimental::MinMax<int,Device> reducer_type;
typedef Kokkos::MinMax<int,Device> reducer_type;
typedef typename reducer_type::value_type minmax_type;
minmax_type minmax;
Kokkos::parallel_reduce(face_cell_left.dimension_0(), KOKKOS_LAMBDA (const int& i, minmax_type& lminmax) {
Kokkos::parallel_reduce(face_cell_left.extent(0), KOKKOS_LAMBDA (const int& i, minmax_type& lminmax) {
if(face_cell_left(i)<lminmax.min_val) lminmax.min_val = face_cell_left(i);
if(face_cell_left(i)>lminmax.max_val) lminmax.max_val = face_cell_left(i);
},reducer_type(minmax));

Kokkos::BinSort<view_type, CompType, Device, int> bin_sort(face_cell_left,CompType(face_cell_left.dimension_0()/2,minmax.min_val,minmax.max_val),true);
Kokkos::BinSort<view_type, CompType, Device, int> bin_sort(face_cell_left,CompType(face_cell_left.extent(0)/2,minmax.min_val,minmax.max_val),true);
bin_sort.create_permute_vector();
Kokkos::deep_copy(device_faces.permute_vector_, bin_sort.sort_order);
}
Expand Down
8 changes: 4 additions & 4 deletions kokkos/GreenGauss.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,24 +317,24 @@ class GreenGauss {
//Sum of all contributions.
green_gauss_gradient_sum<Device> gradient_sum(*cells_, gradients);
Kokkos::parallel_for(mesh_data_->num_owned_cells, gradient_sum);
Device::fence();
Device().fence();
}

//communicate the computed gradient for ghost cells.
void communicate_gradients(gradient_field_type gradients){
//copy values to be send from device to host
extract_shared_tensor<Device, 5, 3> extract_shared_gradients(gradients, mesh_data_->send_local_ids, shared_gradient_vars);//sol_np1_vec, send_local_ids, shared_cells);
Kokkos::parallel_for(mesh_data_->num_ghosts,extract_shared_gradients);
Device::fence();
Device().fence();
Kokkos::deep_copy(shared_gradient_vars_host, shared_gradient_vars);

communicate_ghosted_cell_data(mesh_data_->sendCount, mesh_data_->recvCount, shared_gradient_vars_host.ptr_on_device(),ghosted_gradient_vars_host.ptr_on_device(), 15);
communicate_ghosted_cell_data(mesh_data_->sendCount, mesh_data_->recvCount, shared_gradient_vars_host.data(),ghosted_gradient_vars_host.data(), 15);

//copy values to be sent from host to device
Kokkos::deep_copy(ghosted_gradient_vars, ghosted_gradient_vars_host);
insert_ghost_tensor<Device, 5, 3> insert_ghost_gradients(gradients, mesh_data_->recv_local_ids, ghosted_gradient_vars);
Kokkos::parallel_for(mesh_data_->num_ghosts, insert_ghost_gradients);
Device::fence();
Device().fence();
}

private:
Expand Down
2 changes: 1 addition & 1 deletion kokkos/Parallel3DMesh.h
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ class Parallel3DMesh{
Kokkos::View<double*,Device> ghost_volumes("GhostVolumes",total_recv_count);
typename Kokkos::View<double*,Device>::HostMirror host_ghost_volumes = Kokkos::create_mirror_view(ghost_volumes);

communicate_ghosted_cell_data(mesh_data.sendCount, mesh_data.recvCount, host_shared_volumes.ptr_on_device(),host_ghost_volumes.ptr_on_device(), 1);
communicate_ghosted_cell_data(mesh_data.sendCount, mesh_data.recvCount, host_shared_volumes.data(),host_ghost_volumes.data(), 1);

Kokkos::deep_copy(ghost_volumes,host_ghost_volumes);

Expand Down
15 changes: 15 additions & 0 deletions kokkos/README
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ Sections:
---------
I) Introduction
II) Building
II.a) With CMake (suggested)
II.b) With Make
III) Running
IV) Testing

Expand Down Expand Up @@ -32,6 +34,19 @@ MiniAero has minimal dependencies. It directly depends on the
Kokkos library. You can check this out from github:
git clone https://github.com:kokkos/kokkos

MiniAero can be built with CMake (suggested), or Makefiles.

In all cases, miniAero.kokkos binary is produced.

II.a) With CMake (suggested)

MiniAero fetches Kokkos 4.3.00 as part of the CMake configure step.
Options may be passed to the Kokkos build as normal,
e.g. -DKokkos_ENABLE_OpenMP=ON

MPI can be disabled with -DMINIAERO_ENABLE_MPI=OFF

II.b) With Make
MiniAero uses simple Makefiles and builds Kokkos as an
integrated library (i.e. you do not need to pre-install
Kokkos).
Expand Down
26 changes: 13 additions & 13 deletions kokkos/StencilLimiter.h
Original file line number Diff line number Diff line change
Expand Up @@ -551,40 +551,40 @@ class StencilLimiter{
Kokkos::parallel_for(nboundary_faces, bc_min_max);
}

Device::fence();
Device().fence();

gather_min_max<Device> gather(*cells_, stored_min_, stored_max_, stencil_min_, stencil_max_);
Kokkos::parallel_for(mesh_data_->num_owned_cells, gather);
Device::fence();
Device().fence();
}

void communicate_min_max(){

// For min
extract_shared_vector<Device, 5> extract_shared_min(stencil_min_, mesh_data_->send_local_ids, shared_vars);
Kokkos::parallel_for(mesh_data_->num_ghosts, extract_shared_min);
Device::fence();
Device().fence();
Kokkos::deep_copy(shared_vars_host, shared_vars);

communicate_ghosted_cell_data(mesh_data_->sendCount, mesh_data_->recvCount, shared_vars_host.ptr_on_device(),ghosted_vars_host.ptr_on_device(), 5);
communicate_ghosted_cell_data(mesh_data_->sendCount, mesh_data_->recvCount, shared_vars_host.data(),ghosted_vars_host.data(), 5);

Kokkos::deep_copy(ghosted_vars, ghosted_vars_host);
insert_ghost_vector<Device, 5> insert_ghost_min(stencil_min_, mesh_data_->recv_local_ids, ghosted_vars);
Kokkos::parallel_for(mesh_data_->num_ghosts, insert_ghost_min);
Device::fence();
Device().fence();

// For max
extract_shared_vector<Device, 5> extract_shared_max(stencil_max_, mesh_data_->send_local_ids, shared_vars);
Kokkos::parallel_for(mesh_data_->num_ghosts, extract_shared_max);
Device::fence();
Device().fence();
Kokkos::deep_copy(shared_vars_host, shared_vars);

communicate_ghosted_cell_data(mesh_data_->sendCount, mesh_data_->recvCount, shared_vars_host.ptr_on_device(),ghosted_vars_host.ptr_on_device(), 5);
communicate_ghosted_cell_data(mesh_data_->sendCount, mesh_data_->recvCount, shared_vars_host.data(),ghosted_vars_host.data(), 5);

Kokkos::deep_copy(ghosted_vars, ghosted_vars_host);
insert_ghost_vector<Device, 5> insert_ghost_max(stencil_max_, mesh_data_->recv_local_ids, ghosted_vars);
Kokkos::parallel_for(mesh_data_->num_ghosts, insert_ghost_max);
Device::fence();
Device().fence();
// TODO: Maybe combined or overlapped in future.
}

Expand All @@ -608,26 +608,26 @@ class StencilLimiter{
limiter_face<Device, false> limiter_bc(*faces, sol_np1_vec, *cells_, gradients, stencil_min_, stencil_max_, stored_limiter_);
Kokkos::parallel_for(nboundary_faces, limiter_bc);
}
Device::fence();
Device().fence();

gather_limiter<Device> gather(cells_->nfaces_, stored_limiter_, limiter);
Kokkos::parallel_for(mesh_data_->num_owned_cells, gather);
Device::fence();
Device().fence();
}

void communicate_limiter(solution_field_type limiter) {

extract_shared_vector<Device, 5> extract_shared_limiter(limiter, mesh_data_->send_local_ids, shared_vars);
Kokkos::parallel_for(mesh_data_->num_ghosts, extract_shared_limiter);
Device::fence();
Device().fence();
Kokkos::deep_copy(shared_vars_host, shared_vars);

communicate_ghosted_cell_data(mesh_data_->sendCount, mesh_data_->recvCount, shared_vars_host.ptr_on_device(), ghosted_vars_host.ptr_on_device(), 5);
communicate_ghosted_cell_data(mesh_data_->sendCount, mesh_data_->recvCount, shared_vars_host.data(), ghosted_vars_host.data(), 5);

Kokkos::deep_copy(ghosted_vars, ghosted_vars_host);
insert_ghost_vector<Device, 5> insert_ghost_limiter(limiter, mesh_data_->recv_local_ids, ghosted_vars);
Kokkos::parallel_for(mesh_data_->num_ghosts, insert_ghost_limiter);
Device::fence();
Device().fence();
}

private:
Expand Down
34 changes: 17 additions & 17 deletions kokkos/TimeSolverExplicitRK4.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@

// TPL header files
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
#include "MemoryUsage.h"


Expand Down Expand Up @@ -206,7 +206,7 @@ TimeSolverExplicitRK4<Device>::~TimeSolverExplicitRK4()
template <typename Device>
void TimeSolverExplicitRK4<Device>::Solve()
{
Kokkos::Impl::Timer timer;
Kokkos::Timer timer;
//Kokkos Arrays
typedef typename ViewTypes<Device>::scalar_field_type scalar_field_type;
typedef typename ViewTypes<Device>::solution_field_type solution_field_type;
Expand Down Expand Up @@ -335,7 +335,7 @@ void TimeSolverExplicitRK4<Device>::Solve()
copy<Device> copy_solution( sol_n_vec, sol_np1_vec);
Kokkos::parallel_for(nowned_cells, copy_solution);

Device::fence();
Device().fence();

for (ts_data_.time_it = 1; ts_data_.time_it <= ts_data_.max_its; ++ts_data_.time_it)
{
Expand All @@ -354,30 +354,30 @@ void TimeSolverExplicitRK4<Device>::Solve()
//Update temporary solution used to evaluate the residual for this RK stage
update<Device> update_rk_stage(alpha_[irk], res_vec, sol_n_vec, sol_temp_vec);
Kokkos::parallel_for(nowned_cells, update_rk_stage);
Device::fence();
Device().fence();

#ifdef WITH_MPI
// Update ghosted values (using sol_temp_vec since it is used for all residual calculations.)

//copy values to be send from device to host
extract_shared_vector<Device, 5> extract_shared_values(sol_temp_vec, send_local_ids, shared_conserved_vars);
Kokkos::parallel_for(num_ghosts,extract_shared_values);
Device::fence();
Device().fence();
Kokkos::deep_copy(shared_conserved_vars_host, shared_conserved_vars);

communicate_ghosted_cell_data(sendCount, recvCount, shared_conserved_vars_host.ptr_on_device(),ghosted_conserved_vars_host.ptr_on_device(), 5);
communicate_ghosted_cell_data(sendCount, recvCount, shared_conserved_vars_host.data(),ghosted_conserved_vars_host.data(), 5);

//copy values to be sent from host to device
Kokkos::deep_copy(ghosted_conserved_vars, ghosted_conserved_vars_host);
insert_ghost_vector<Device, 5> insert_ghost_values(sol_temp_vec, recv_local_ids, ghosted_conserved_vars);
Kokkos::parallel_for(num_ghosts, insert_ghost_values);
Device::fence();
Device().fence();
#endif

//Zero fluxes
zero_cell_flux<Device> zero_flux(cells);
Kokkos::parallel_for(nowned_cells, zero_flux);
Device::fence();
Device().fence();

//Compute Gradients and Limiters
if(options_.second_order_space || options_.viscous){
Expand Down Expand Up @@ -410,7 +410,7 @@ void TimeSolverExplicitRK4<Device>::Solve()
compute_face_flux<Device, false, roe_flux<Device>, newtonian_viscous_flux<Device> > fluxop(internal_faces, sol_temp_vec, gradients, limiters, cells, inviscid_flux_evaluator, viscous_flux_evaluator);
Kokkos::parallel_for(ninternal_faces,fluxop);
}
Device::fence();
Device().fence();
}
else{
no_viscous_flux<Device> viscous_flux_evaluator;
Expand All @@ -422,7 +422,7 @@ void TimeSolverExplicitRK4<Device>::Solve()
compute_face_flux<Device, false, roe_flux<Device>, no_viscous_flux<Device> > fluxop(internal_faces, sol_temp_vec, gradients, limiters, cells, inviscid_flux_evaluator, viscous_flux_evaluator);
Kokkos::parallel_for(ninternal_faces,fluxop);
}
Device::fence();
Device().fence();
}

//Extrapolated BC fluxes
Expand All @@ -435,7 +435,7 @@ void TimeSolverExplicitRK4<Device>::Solve()
compute_extrapolateBC_flux<Device, roe_flux<Device> > boundary_fluxop(bc_faces, sol_temp_vec, cells, inviscid_flux_evaluator);
Kokkos::parallel_for(nboundary_faces,boundary_fluxop);
}
Device::fence();
Device().fence();

//Tangent BC fluxes
typename std::vector<Faces<Device> >::iterator tf_iter, tf_iter_end;
Expand All @@ -447,7 +447,7 @@ void TimeSolverExplicitRK4<Device>::Solve()
compute_tangentBC_flux<Device, roe_flux<Device> > boundary_fluxop(bc_faces, sol_temp_vec, cells, inviscid_flux_evaluator);
Kokkos::parallel_for(nboundary_faces,boundary_fluxop);
}
Device::fence();
Device().fence();

//Noslip BC fluxes
typename std::vector<Faces<Device> >::iterator if_iter, if_iter_end;
Expand All @@ -460,7 +460,7 @@ void TimeSolverExplicitRK4<Device>::Solve()
compute_NoSlipBC_flux<Device, roe_flux<Device>, newtonian_viscous_flux<Device> > boundary_fluxop(bc_faces, sol_temp_vec, cells, inviscid_flux_evaluator, viscous_flux_evaluator);
Kokkos::parallel_for(nboundary_faces,boundary_fluxop);
}
Device::fence();
Device().fence();

//Inflow BC fluxes
typename std::vector<Faces<Device> >::iterator nsf_iter, nsf_iter_end;
Expand All @@ -472,25 +472,25 @@ void TimeSolverExplicitRK4<Device>::Solve()
compute_inflowBC_flux<Device, roe_flux<Device> > boundary_fluxop(bc_faces, sol_temp_vec, cells, &inflow_state[0], inviscid_flux_evaluator);
Kokkos::parallel_for(nboundary_faces,boundary_fluxop);
}
Device::fence();
Device().fence();

//Sum up all of the contributions
apply_cell_flux<Device> flux_residual(cells, res_vec, ts_data_.dt);
Kokkos::parallel_for(nowned_cells, flux_residual);
Device::fence();
Device().fence();

//Update np1 solution with each stages contribution
update<Device> update_fields(beta_[irk],res_vec,sol_np1_vec,sol_np1_vec);
Kokkos::parallel_for(nowned_cells, update_fields);
Device::fence();
Device().fence();
}
// Update the solution vector after having run all of the RK stages.
copy<Device> copy_solution( sol_np1_vec, sol_n_vec);
Kokkos::parallel_for(nowned_cells, copy_solution);

}

Device::fence();
Device().fence();
if(my_id_==0){
fprintf(stdout,"\n ... Device Run time: %8.2f seconds ...\n", timer.seconds());
}
Expand Down
Loading