diff --git a/kokkos/CMakeLists.txt b/kokkos/CMakeLists.txt new file mode 100644 index 0000000..6cc6825 --- /dev/null +++ b/kokkos/CMakeLists.txt @@ -0,0 +1,63 @@ +cmake_minimum_required(VERSION 3.16 FATAL_ERROR) # same as Kokkos 4.3 + +option(MINIAERO_ENABLE_MPI "Enable MPI support" ON) +option(MINIAERO_EXTERNAL_KOKKOS "Use out-of-tree Kokkos" OFF) + +project(miniFE LANGUAGES CXX) + +# needed for View::dimension_0 +if (MINIAERO_EXTERNAL_KOKKOS) + find_package(Kokkos REQUIRED) +else() + set(Kokkos_ENABLE_DEPRECATED_CODE ON CACHE BOOL "" FORCE) + include(FetchContent) + message(STATUS "Fetch, configure, and build Kokkos 4.3.00...") + FetchContent_Declare(Kokkos + GIT_REPOSITORY git@github.com:kokkos/kokkos.git + GIT_TAG 4.3.00 + ) + FetchContent_MakeAvailable(Kokkos) +endif() + +if(MINIAERO_ENABLE_MPI) + find_package(MPI REQUIRED) +else() + find_package(MPI) # okay if we can't find MPI if the user didn't ask for it +endif() + +file(GLOB MINIAERO_SRCS "*.C") +set(MINIAERO_INCL_DIRS ${CMAKE_CURRENT_LIST_DIR}) + +add_executable(miniAero.kokkos ${MINIAERO_SRCS}) +target_link_libraries(miniAero.kokkos Kokkos::kokkos) +foreach(INCL_DIR ${MINIAERO_INCL_DIRS}) + target_include_directories(miniAero.kokkos PRIVATE ${INCL_DIR}) +endforeach() +target_compile_definitions(miniAero.kokkos PRIVATE ATOMICS_FLUX) # needed for correctness + +if (MPI_FOUND AND MINIAERO_ENABLE_MPI) + message(STATUS "MPI_VERSION = ${MPI_VERSION}") + message(STATUS "MPI_CXX_COMPILER = ${MPI_CXX_COMPILER}") + message(STATUS "MPI_CXX_COMPILE_OPTIONS = ${MPI_CXX_COMPILE_OPTIONS}") + message(STATUS "MPI_CXX_COMPILE_DEFINITIONS = ${MPI_CXX_COMPILE_DEFINITIONS}") + message(STATUS "MPI_CXX_INCLUDE_DIRS = ${MPI_CXX_INCLUDE_DIRS}") + message(STATUS "MPI_CXX_LINK_FLAGS = ${MPI_CXX_LINK_FLAGS}") + message(STATUS "MPI_CXX_LIBRARIES = ${MPI_CXX_LIBRARIES}") + target_compile_definitions(miniAero.kokkos PRIVATE WITH_MPI=1) + target_link_libraries(miniAero.kokkos MPI::MPI_CXX) +endif() + +enable_testing() + +add_test( + NAME AllTests + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/tests + COMMAND run_tests.sh ${CMAKE_CURRENT_BINARY_DIR}/miniAero.kokkos +) +if (MPI_FOUND AND MINIAERO_ENABLE_MPI) + add_test( + NAME AllTests-MPI + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/tests + COMMAND run_tests.sh ${CMAKE_CURRENT_BINARY_DIR}/miniAero.kokkos MPI + ) +endif() \ No newline at end of file diff --git a/kokkos/Faces.h b/kokkos/Faces.h index feb1a6d..0bd50ae 100644 --- a/kokkos/Faces.h +++ b/kokkos/Faces.h @@ -125,20 +125,20 @@ void copy_faces(Faces device_faces, std::vector & mesh_faces){ Kokkos::deep_copy(device_faces.face_tangent_, face_tangent); Kokkos::deep_copy(device_faces.face_binormal_, face_binormal); - if(device_faces.face_cell_conn_.dimension_0() > 0) { + if(device_faces.face_cell_conn_.extent(0) > 0) { typedef Kokkos::View view_type; typedef Kokkos::BinOp1D< view_type > CompType; view_type face_cell_left = Kokkos::subview(device_faces.face_cell_conn_,Kokkos::ALL(),0); - typedef Kokkos::Experimental::MinMax reducer_type; + typedef Kokkos::MinMax reducer_type; typedef typename reducer_type::value_type minmax_type; minmax_type minmax; - Kokkos::parallel_reduce(face_cell_left.dimension_0(), KOKKOS_LAMBDA (const int& i, minmax_type& lminmax) { + Kokkos::parallel_reduce(face_cell_left.extent(0), KOKKOS_LAMBDA (const int& i, minmax_type& lminmax) { if(face_cell_left(i)lminmax.max_val) lminmax.max_val = face_cell_left(i); },reducer_type(minmax)); - Kokkos::BinSort bin_sort(face_cell_left,CompType(face_cell_left.dimension_0()/2,minmax.min_val,minmax.max_val),true); + Kokkos::BinSort bin_sort(face_cell_left,CompType(face_cell_left.extent(0)/2,minmax.min_val,minmax.max_val),true); bin_sort.create_permute_vector(); Kokkos::deep_copy(device_faces.permute_vector_, bin_sort.sort_order); } diff --git a/kokkos/GreenGauss.h b/kokkos/GreenGauss.h index db4c47e..5229585 100644 --- a/kokkos/GreenGauss.h +++ b/kokkos/GreenGauss.h @@ -317,7 +317,7 @@ class GreenGauss { //Sum of all contributions. green_gauss_gradient_sum gradient_sum(*cells_, gradients); Kokkos::parallel_for(mesh_data_->num_owned_cells, gradient_sum); - Device::fence(); + Device().fence(); } //communicate the computed gradient for ghost cells. @@ -325,16 +325,16 @@ class GreenGauss { //copy values to be send from device to host extract_shared_tensor extract_shared_gradients(gradients, mesh_data_->send_local_ids, shared_gradient_vars);//sol_np1_vec, send_local_ids, shared_cells); Kokkos::parallel_for(mesh_data_->num_ghosts,extract_shared_gradients); - Device::fence(); + Device().fence(); Kokkos::deep_copy(shared_gradient_vars_host, shared_gradient_vars); - communicate_ghosted_cell_data(mesh_data_->sendCount, mesh_data_->recvCount, shared_gradient_vars_host.ptr_on_device(),ghosted_gradient_vars_host.ptr_on_device(), 15); + communicate_ghosted_cell_data(mesh_data_->sendCount, mesh_data_->recvCount, shared_gradient_vars_host.data(),ghosted_gradient_vars_host.data(), 15); //copy values to be sent from host to device Kokkos::deep_copy(ghosted_gradient_vars, ghosted_gradient_vars_host); insert_ghost_tensor insert_ghost_gradients(gradients, mesh_data_->recv_local_ids, ghosted_gradient_vars); Kokkos::parallel_for(mesh_data_->num_ghosts, insert_ghost_gradients); - Device::fence(); + Device().fence(); } private: diff --git a/kokkos/Parallel3DMesh.h b/kokkos/Parallel3DMesh.h index 69929f0..616f7b2 100644 --- a/kokkos/Parallel3DMesh.h +++ b/kokkos/Parallel3DMesh.h @@ -436,7 +436,7 @@ class Parallel3DMesh{ Kokkos::View ghost_volumes("GhostVolumes",total_recv_count); typename Kokkos::View::HostMirror host_ghost_volumes = Kokkos::create_mirror_view(ghost_volumes); - communicate_ghosted_cell_data(mesh_data.sendCount, mesh_data.recvCount, host_shared_volumes.ptr_on_device(),host_ghost_volumes.ptr_on_device(), 1); + communicate_ghosted_cell_data(mesh_data.sendCount, mesh_data.recvCount, host_shared_volumes.data(),host_ghost_volumes.data(), 1); Kokkos::deep_copy(ghost_volumes,host_ghost_volumes); diff --git a/kokkos/README b/kokkos/README index 316f781..c64bc04 100644 --- a/kokkos/README +++ b/kokkos/README @@ -5,6 +5,8 @@ Sections: --------- I) Introduction II) Building +II.a) With CMake (suggested) +II.b) With Make III) Running IV) Testing @@ -32,6 +34,19 @@ MiniAero has minimal dependencies. It directly depends on the Kokkos library. You can check this out from github: git clone https://github.com:kokkos/kokkos +MiniAero can be built with CMake (suggested), or Makefiles. + +In all cases, miniAero.kokkos binary is produced. + +II.a) With CMake (suggested) + +MiniAero fetches Kokkos 4.3.00 as part of the CMake configure step. +Options may be passed to the Kokkos build as normal, +e.g. -DKokkos_ENABLE_OpenMP=ON + +MPI can be disabled with -DMINIAERO_ENABLE_MPI=OFF + +II.b) With Make MiniAero uses simple Makefiles and builds Kokkos as an integrated library (i.e. you do not need to pre-install Kokkos). diff --git a/kokkos/StencilLimiter.h b/kokkos/StencilLimiter.h index 05647d9..e409133 100644 --- a/kokkos/StencilLimiter.h +++ b/kokkos/StencilLimiter.h @@ -551,11 +551,11 @@ class StencilLimiter{ Kokkos::parallel_for(nboundary_faces, bc_min_max); } - Device::fence(); + Device().fence(); gather_min_max gather(*cells_, stored_min_, stored_max_, stencil_min_, stencil_max_); Kokkos::parallel_for(mesh_data_->num_owned_cells, gather); - Device::fence(); + Device().fence(); } void communicate_min_max(){ @@ -563,28 +563,28 @@ class StencilLimiter{ // For min extract_shared_vector extract_shared_min(stencil_min_, mesh_data_->send_local_ids, shared_vars); Kokkos::parallel_for(mesh_data_->num_ghosts, extract_shared_min); - Device::fence(); + Device().fence(); Kokkos::deep_copy(shared_vars_host, shared_vars); - communicate_ghosted_cell_data(mesh_data_->sendCount, mesh_data_->recvCount, shared_vars_host.ptr_on_device(),ghosted_vars_host.ptr_on_device(), 5); + communicate_ghosted_cell_data(mesh_data_->sendCount, mesh_data_->recvCount, shared_vars_host.data(),ghosted_vars_host.data(), 5); Kokkos::deep_copy(ghosted_vars, ghosted_vars_host); insert_ghost_vector insert_ghost_min(stencil_min_, mesh_data_->recv_local_ids, ghosted_vars); Kokkos::parallel_for(mesh_data_->num_ghosts, insert_ghost_min); - Device::fence(); + Device().fence(); // For max extract_shared_vector extract_shared_max(stencil_max_, mesh_data_->send_local_ids, shared_vars); Kokkos::parallel_for(mesh_data_->num_ghosts, extract_shared_max); - Device::fence(); + Device().fence(); Kokkos::deep_copy(shared_vars_host, shared_vars); - communicate_ghosted_cell_data(mesh_data_->sendCount, mesh_data_->recvCount, shared_vars_host.ptr_on_device(),ghosted_vars_host.ptr_on_device(), 5); + communicate_ghosted_cell_data(mesh_data_->sendCount, mesh_data_->recvCount, shared_vars_host.data(),ghosted_vars_host.data(), 5); Kokkos::deep_copy(ghosted_vars, ghosted_vars_host); insert_ghost_vector insert_ghost_max(stencil_max_, mesh_data_->recv_local_ids, ghosted_vars); Kokkos::parallel_for(mesh_data_->num_ghosts, insert_ghost_max); - Device::fence(); + Device().fence(); // TODO: Maybe combined or overlapped in future. } @@ -608,26 +608,26 @@ class StencilLimiter{ limiter_face limiter_bc(*faces, sol_np1_vec, *cells_, gradients, stencil_min_, stencil_max_, stored_limiter_); Kokkos::parallel_for(nboundary_faces, limiter_bc); } - Device::fence(); + Device().fence(); gather_limiter gather(cells_->nfaces_, stored_limiter_, limiter); Kokkos::parallel_for(mesh_data_->num_owned_cells, gather); - Device::fence(); + Device().fence(); } void communicate_limiter(solution_field_type limiter) { extract_shared_vector extract_shared_limiter(limiter, mesh_data_->send_local_ids, shared_vars); Kokkos::parallel_for(mesh_data_->num_ghosts, extract_shared_limiter); - Device::fence(); + Device().fence(); Kokkos::deep_copy(shared_vars_host, shared_vars); - communicate_ghosted_cell_data(mesh_data_->sendCount, mesh_data_->recvCount, shared_vars_host.ptr_on_device(), ghosted_vars_host.ptr_on_device(), 5); + communicate_ghosted_cell_data(mesh_data_->sendCount, mesh_data_->recvCount, shared_vars_host.data(), ghosted_vars_host.data(), 5); Kokkos::deep_copy(ghosted_vars, ghosted_vars_host); insert_ghost_vector insert_ghost_limiter(limiter, mesh_data_->recv_local_ids, ghosted_vars); Kokkos::parallel_for(mesh_data_->num_ghosts, insert_ghost_limiter); - Device::fence(); + Device().fence(); } private: diff --git a/kokkos/TimeSolverExplicitRK4.h b/kokkos/TimeSolverExplicitRK4.h index 3f61dca..acd6c2f 100644 --- a/kokkos/TimeSolverExplicitRK4.h +++ b/kokkos/TimeSolverExplicitRK4.h @@ -47,7 +47,7 @@ // TPL header files #include -#include +#include #include "MemoryUsage.h" @@ -206,7 +206,7 @@ TimeSolverExplicitRK4::~TimeSolverExplicitRK4() template void TimeSolverExplicitRK4::Solve() { - Kokkos::Impl::Timer timer; + Kokkos::Timer timer; //Kokkos Arrays typedef typename ViewTypes::scalar_field_type scalar_field_type; typedef typename ViewTypes::solution_field_type solution_field_type; @@ -335,7 +335,7 @@ void TimeSolverExplicitRK4::Solve() copy copy_solution( sol_n_vec, sol_np1_vec); Kokkos::parallel_for(nowned_cells, copy_solution); - Device::fence(); + Device().fence(); for (ts_data_.time_it = 1; ts_data_.time_it <= ts_data_.max_its; ++ts_data_.time_it) { @@ -354,7 +354,7 @@ void TimeSolverExplicitRK4::Solve() //Update temporary solution used to evaluate the residual for this RK stage update update_rk_stage(alpha_[irk], res_vec, sol_n_vec, sol_temp_vec); Kokkos::parallel_for(nowned_cells, update_rk_stage); - Device::fence(); + Device().fence(); #ifdef WITH_MPI // Update ghosted values (using sol_temp_vec since it is used for all residual calculations.) @@ -362,22 +362,22 @@ void TimeSolverExplicitRK4::Solve() //copy values to be send from device to host extract_shared_vector extract_shared_values(sol_temp_vec, send_local_ids, shared_conserved_vars); Kokkos::parallel_for(num_ghosts,extract_shared_values); - Device::fence(); + Device().fence(); Kokkos::deep_copy(shared_conserved_vars_host, shared_conserved_vars); - communicate_ghosted_cell_data(sendCount, recvCount, shared_conserved_vars_host.ptr_on_device(),ghosted_conserved_vars_host.ptr_on_device(), 5); + communicate_ghosted_cell_data(sendCount, recvCount, shared_conserved_vars_host.data(),ghosted_conserved_vars_host.data(), 5); //copy values to be sent from host to device Kokkos::deep_copy(ghosted_conserved_vars, ghosted_conserved_vars_host); insert_ghost_vector insert_ghost_values(sol_temp_vec, recv_local_ids, ghosted_conserved_vars); Kokkos::parallel_for(num_ghosts, insert_ghost_values); - Device::fence(); + Device().fence(); #endif //Zero fluxes zero_cell_flux zero_flux(cells); Kokkos::parallel_for(nowned_cells, zero_flux); - Device::fence(); + Device().fence(); //Compute Gradients and Limiters if(options_.second_order_space || options_.viscous){ @@ -410,7 +410,7 @@ void TimeSolverExplicitRK4::Solve() compute_face_flux, newtonian_viscous_flux > fluxop(internal_faces, sol_temp_vec, gradients, limiters, cells, inviscid_flux_evaluator, viscous_flux_evaluator); Kokkos::parallel_for(ninternal_faces,fluxop); } - Device::fence(); + Device().fence(); } else{ no_viscous_flux viscous_flux_evaluator; @@ -422,7 +422,7 @@ void TimeSolverExplicitRK4::Solve() compute_face_flux, no_viscous_flux > fluxop(internal_faces, sol_temp_vec, gradients, limiters, cells, inviscid_flux_evaluator, viscous_flux_evaluator); Kokkos::parallel_for(ninternal_faces,fluxop); } - Device::fence(); + Device().fence(); } //Extrapolated BC fluxes @@ -435,7 +435,7 @@ void TimeSolverExplicitRK4::Solve() compute_extrapolateBC_flux > boundary_fluxop(bc_faces, sol_temp_vec, cells, inviscid_flux_evaluator); Kokkos::parallel_for(nboundary_faces,boundary_fluxop); } - Device::fence(); + Device().fence(); //Tangent BC fluxes typename std::vector >::iterator tf_iter, tf_iter_end; @@ -447,7 +447,7 @@ void TimeSolverExplicitRK4::Solve() compute_tangentBC_flux > boundary_fluxop(bc_faces, sol_temp_vec, cells, inviscid_flux_evaluator); Kokkos::parallel_for(nboundary_faces,boundary_fluxop); } - Device::fence(); + Device().fence(); //Noslip BC fluxes typename std::vector >::iterator if_iter, if_iter_end; @@ -460,7 +460,7 @@ void TimeSolverExplicitRK4::Solve() compute_NoSlipBC_flux, newtonian_viscous_flux > boundary_fluxop(bc_faces, sol_temp_vec, cells, inviscid_flux_evaluator, viscous_flux_evaluator); Kokkos::parallel_for(nboundary_faces,boundary_fluxop); } - Device::fence(); + Device().fence(); //Inflow BC fluxes typename std::vector >::iterator nsf_iter, nsf_iter_end; @@ -472,17 +472,17 @@ void TimeSolverExplicitRK4::Solve() compute_inflowBC_flux > boundary_fluxop(bc_faces, sol_temp_vec, cells, &inflow_state[0], inviscid_flux_evaluator); Kokkos::parallel_for(nboundary_faces,boundary_fluxop); } - Device::fence(); + Device().fence(); //Sum up all of the contributions apply_cell_flux flux_residual(cells, res_vec, ts_data_.dt); Kokkos::parallel_for(nowned_cells, flux_residual); - Device::fence(); + Device().fence(); //Update np1 solution with each stages contribution update update_fields(beta_[irk],res_vec,sol_np1_vec,sol_np1_vec); Kokkos::parallel_for(nowned_cells, update_fields); - Device::fence(); + Device().fence(); } // Update the solution vector after having run all of the RK stages. copy copy_solution( sol_np1_vec, sol_n_vec); @@ -490,7 +490,7 @@ void TimeSolverExplicitRK4::Solve() } - Device::fence(); + Device().fence(); if(my_id_==0){ fprintf(stdout,"\n ... Device Run time: %8.2f seconds ...\n", timer.seconds()); } diff --git a/kokkos/tests/run_tests.sh b/kokkos/tests/run_tests.sh index b2a032b..ca6ae9a 100755 --- a/kokkos/tests/run_tests.sh +++ b/kokkos/tests/run_tests.sh @@ -3,6 +3,7 @@ green='\e[0;32m' red='\e[0;31m' NC='\e[0m' +code=0 EXE=`echo "$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"` DO_MPI=`echo $2 | grep MPI | wc -l` @@ -12,6 +13,7 @@ cd 3D_Sod_Serial if [ "$?" -ne 0 ]; then echo -e "${red}3D_Sod_Serial Test Failed${NC}" + code=$(($code + 1)) else echo -e "${green} 3D_Sod_Serial Test Passed${NC}" fi @@ -24,6 +26,7 @@ cd 3D_Sod_Parallel if [ "$?" -ne 0 ]; then echo -e "${red}3D_Sod_Parallel Test Failed${NC}" + code=$(($code + 1)) else echo -e "${green} 3D_Sod_Parallel Test Passed${NC}" fi @@ -35,6 +38,7 @@ cd Ramp_Serial if [ "$?" -ne 0 ]; then echo -e "${red}Ramp_Serial Test Failed${NC}" + code=$(($code + 1)) else echo -e "${green} Ramp_Serial Test Passed${NC}" fi @@ -47,6 +51,7 @@ cd Ramp_Parallel if [ "$?" -ne 0 ]; then echo -e "${red}Ramp_Parallel Test Failed${NC}" + code=$(($code + 1)) else echo -e "${green} Ramp_Parallel Test Passed${NC}" fi @@ -58,6 +63,7 @@ cd FlatPlate_Serial if [ "$?" -ne 0 ]; then echo -e "${red}FlatPlate_Serial Test Failed${NC}" + code=$(($code + 1)) else echo -e "${green} FlatPlate_Serial Test Passed${NC}" fi @@ -70,8 +76,11 @@ cd FlatPlate_Parallel if [ "$?" -ne 0 ]; then echo -e "${red}FlatPlate_Parallel Test Failed${NC}" + code=$(($code + 1)) else echo -e "${green} FlatPlate_Parallel Test Passed${NC}" fi cd .. fi + +exit $code \ No newline at end of file diff --git a/kokkos/tests/tools/numeric_text_diff b/kokkos/tests/tools/numeric_text_diff index a52c0cf..73ac8e6 100755 --- a/kokkos/tests/tools/numeric_text_diff +++ b/kokkos/tests/tools/numeric_text_diff @@ -1,8 +1,8 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 from optparse import OptionParser import os -import popen2 +from subprocess import Popen, PIPE import sys # Simple wrapper for when failure discovered @@ -42,7 +42,8 @@ for filename in args: fail("%s does not exist or is not a normal file!" % filename) # Diff the two files -diff_output = os.popen("diff -u %s %s" % (file1, file2), "r") +# diff_output = os.popen("diff -u %s %s" % (file1, file2), "r") +diff_output = Popen("diff -u %s %s" % (file1, file2), shell=True, stdout=PIPE, encoding='utf-8').stdout before = [] after = [] diff_output.readline() # Skip info about file1 @@ -58,7 +59,7 @@ if len(before) != len(after): # Loop over the lines that changed between the two files differences = '' -for i in xrange(len(before)): +for i in range(len(before)): before_fields = before[i].split() after_fields = after[i].split() if len(before_fields) != len(after_fields): @@ -66,7 +67,7 @@ for i in xrange(len(before)): ' %s %s' % (before[i], after[i])) # Loop over the fields present in the given line - for j in xrange(len(before_fields)): + for j in range(len(before_fields)): before_val = before_fields[j] after_val = after_fields[j]