From e1ec9ad4d31d7b3af9fc150dd510ee471d5dba5f Mon Sep 17 00:00:00 2001 From: Anand Radhakrishnan Date: Fri, 23 Jan 2026 09:52:59 -0500 Subject: [PATCH 01/21] LLVMFlang workarounds --- CMakeLists.txt | 44 +- load_amd.sh | 39 +- src/common/include/shared_parallel_macros.fpp | 10 + src/common/m_boundary_common.fpp | 362 ++++++----- src/common/m_chemistry.fpp | 415 +++++++------ src/post_process/m_global_parameters.fpp | 2 + src/pre_process/m_global_parameters.fpp | 2 + src/simulation/include/inline_capillary.fpp | 9 +- src/simulation/m_cbc.fpp | 22 +- src/simulation/m_data_output.fpp | 16 +- src/simulation/m_global_parameters.fpp | 2 + src/simulation/m_hyperelastic.fpp | 2 +- src/simulation/m_igr.fpp | 6 +- src/simulation/m_muscl.fpp | 7 +- src/simulation/m_rhs.fpp | 97 +-- src/simulation/m_riemann_solvers.fpp | 415 +++++++------ src/simulation/m_sim_helpers.fpp | 10 +- src/simulation/m_start_up.fpp | 9 +- src/simulation/m_surface_tension.fpp | 77 +-- src/simulation/m_time_steppers.fpp | 7 +- src/simulation/m_viscous.fpp | 587 +++++++++--------- src/simulation/m_weno.fpp | 101 +-- toolchain/mfc/case.py | 4 +- 23 files changed, 1223 insertions(+), 1022 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 751a690b55..0507754dce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -310,7 +310,18 @@ endif() # and generate documentation. Instead, we can simply include the list of .f90 # files that will eventually be used to compile . -macro(HANDLE_SOURCES target useCommon) +macro(HANDLE_SOURCES target useCommon useOpenACC useOpenMP) + + if (${useOpenACC} AND ${useOpenMP}) + message(FATAL_ERROR "OpenACC and OpenMP at same time is unsupported.") + elseif (${useOpenACC}) + set(MFC_GPU_MODE "OpenACC") + elseif (${useOpenMP}) + set(MFC_GPU_MODE "OpenMP") + else() + set(MFC_GPU_MODE "") + endif() + set(${target}_DIR "${CMAKE_SOURCE_DIR}/src/${target}") set(common_DIR "${CMAKE_SOURCE_DIR}/src/common") @@ -371,7 +382,8 @@ macro(HANDLE_SOURCES target useCommon) -D MFC_${CMAKE_Fortran_COMPILER_ID} -D MFC_${${target}_UPPER} -D MFC_COMPILER="${CMAKE_Fortran_COMPILER_ID}" - -D MFC_CASE_OPTIMIZATION=False + -D MFC_GPU_MODE="${MFC_GPU_MODE}" + -D MFC_CASE_OPTIMIZATION=False -D chemistry=False --line-numbering --no-folding @@ -388,11 +400,10 @@ macro(HANDLE_SOURCES target useCommon) endmacro() -HANDLE_SOURCES(pre_process ON) -HANDLE_SOURCES(simulation ON) -HANDLE_SOURCES(post_process ON) -HANDLE_SOURCES(syscheck OFF) - +HANDLE_SOURCES(pre_process ON OFF OFF) +HANDLE_SOURCES(simulation ON ${MFC_OpenACC} ${MFC_OpenMP}) +HANDLE_SOURCES(post_process ON OFF OFF) +HANDLE_SOURCES(syscheck OFF ${MFC_OpenACC} ${MFC_OpenMP}) # MFC_SETUP_TARGET: Given a target (herein ), this macro creates a new # executable with the appropriate sources, compiler definitions, and @@ -430,8 +441,9 @@ function(MFC_SETUP_TARGET) foreach (a_target ${IPO_TARGETS}) set_target_properties(${a_target} PROPERTIES Fortran_PREPROCESS ON) + message(STATUS ${CMAKE_Fortran_COMPILER_ID}) - target_include_directories(${a_target} PRIVATE + target_include_directories(${a_target} PRIVATE "${CMAKE_SOURCE_DIR}/src/common" "${CMAKE_SOURCE_DIR}/src/common/include" "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}") @@ -449,9 +461,9 @@ function(MFC_SETUP_TARGET) if (MFC_MPI AND ARGS_MPI) find_package(MPI COMPONENTS Fortran REQUIRED) - - target_compile_definitions(${a_target} PRIVATE MFC_MPI) - target_link_libraries (${a_target} PRIVATE MPI::MPI_Fortran) + + target_compile_definitions(${a_target} PRIVATE $ENV{CRAY_MPICH_INC}) + target_link_libraries (${a_target} PRIVATE $ENV{CRAY_MPICH_LIB}) endif() if (ARGS_SILO) @@ -470,9 +482,9 @@ function(MFC_SETUP_TARGET) find_package(CUDAToolkit REQUIRED) target_link_libraries(${a_target} PRIVATE CUDA::cudart CUDA::cufft) else() - find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED) - target_link_libraries(${a_target} PRIVATE hipfort::hipfft) - endif() + #find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED) + target_link_libraries(${a_target} PRIVATE $ENV{CRAY_HIPFORT_INC}) + endif() else() find_package(FFTW REQUIRED) target_link_libraries(${a_target} PRIVATE FFTW::FFTW) @@ -517,8 +529,8 @@ function(MFC_SETUP_TARGET) target_compile_options(${a_target} PRIVATE -fopenmp) target_link_options(${a_target} PRIVATE -fopenmp) elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") - target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a) - target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a) + target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a $ENV{CRAY_MPICH_INC}) + target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a $ENV{CRAY_MPICH_LIB}) endif() endif() diff --git a/load_amd.sh b/load_amd.sh index 550da1687d..7137c18fc4 100644 --- a/load_amd.sh +++ b/load_amd.sh @@ -1,7 +1,32 @@ -module use /ccs/home/bcornille/afar-drops/modulefiles/Core/ -module load rocm-afar-drop mpich cray-python -module load cmake -#export OMPX_APU_MAPS=0 -#export HSA_XNACK=0 -#export LIBOMPTARGET_INFO=0 -#export AMD_LOG_LEVEL=1 +module load python cmake +module load cpe/25.09 +module load PrgEnv-amd + +AFAR_UMS_BASEDIR="/sw/crusher/ums/compilers/afar" +AFAR_UMS_LATEST=$(ls -d --color=never ${AFAR_UMS_BASEDIR}/*/ | tail -n1) +export OLCF_AFAR_ROOT=${AFAR_UMS_BASEDIR}/"rocm-afar-8873-drop-22.2.0" + +export PATH=${OLCF_AFAR_ROOT}/lib/llvm/bin:${PATH} +export LD_LIBRARY_PATH=${OLCF_AFAR_ROOT}/lib:${OLCF_AFAR_ROOT}/lib/llvm/lib:${LD_LIBRARY_PATH} + +export CRAY_MPICH_INC="-I${OLCF_AFAR_ROOT}/include/mpich3.4a2" +export CRAY_HIPFORT_INC="-I${OLCF_AFAR_ROOT}/include/hipfort/amdgcn" +export CRAY_HIP_INC="-I${OLCF_AFAR_ROOT}/include/hip" +export CRAY_MPICH_LIB="-L${CRAY_MPICH_PREFIX}/lib \ + ${CRAY_PMI_POST_LINK_OPTS} \ + -lmpifort_amd -lmpi_amd -lmpi -lpmi -lpmi2" +export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${CRAY_LD_LIBRARY_PATH}" +export CMAKE_PREFIX_PATH="${OLCF_AFAR_ROOT}:${CMAKE_PREFIX_PATH}" +export FC="${OLCF_AFAR_ROOT}/bin/amdflang" + +unset MPICH_GPU_SUPPORT_ENABLED + +# module use /ccs/home/bcornille/afar-drops/modulefiles/Core/ +# module load rocm-afar-drop mpich cray-python +# module load cmake + +export OMPX_APU_MAPS=0 +export HSA_XNACK=0 +export LIBOMPTARGET_INFO=0 +export AMD_LOG_LEVEL=0 +export OFFLOAD_TRACK_ALLOCATION_TRACES=false diff --git a/src/common/include/shared_parallel_macros.fpp b/src/common/include/shared_parallel_macros.fpp index a3a0b6f753..36bee0a23a 100644 --- a/src/common/include/shared_parallel_macros.fpp +++ b/src/common/include/shared_parallel_macros.fpp @@ -1,3 +1,13 @@ +#:set NVIDIA_COMPILER_ID="NVHPC" +#:set PGI_COMPILER_ID="PGI" +#:set INTEL_COMPILER_ID="Intel" +#:set CCE_COMPILER_ID="Cray" +#:set AMD_COMPILER_ID="LLVMFlang" + +#:set USING_NVHPC = (MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID) +#:set USING_CCE = (MFC_COMPILER == CCE_COMPILER_ID) +#:set USING_AMD = (MFC_COMPILER == AMD_COMPILER_ID) + #:def ASSERT_LIST(data, datatype) #:assert data is not None #:assert isinstance(data, list) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 833bcbb908..4fccf3aa68 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -152,68 +152,74 @@ contains if (n == 0) return - if (bc_y%beg >= 0) then - call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb_in, mv_in) - else - $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (int(bc_type(2, 1)%sf(k, 0, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 2, -1, k, l) - case (BC_AXIS) - call s_axis(q_prim_vf, pb_in, mv_in, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 2, -1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 2, -1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 2, -1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 2, -1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 2, -1, k, l) - end select + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 + + if (bc_y%beg >= 0) then + call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb_in, mv_in) + else + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = -buff_size, m + buff_size + select case (int(bc_type(2, 1)%sf(k, 0, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 2, -1, k, l) + case (BC_AXIS) + call s_axis(q_prim_vf, pb_in, mv_in, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 2, -1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 2, -1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 2, -1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 2, -1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 2, -1, k, l) + end select - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(2, 1)%sf(k, 0, l) <= BC_GHOST_EXTRAP) .and. & - (bc_type(2, 1)%sf(k, 0, l) /= BC_AXIS)) then - call s_qbmm_extrapolation(2, -1, k, l, pb_in, mv_in) - end if + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(2, 1)%sf(k, 0, l) <= BC_GHOST_EXTRAP) .and. & + (bc_type(2, 1)%sf(k, 0, l) /= BC_AXIS)) then + call s_qbmm_extrapolation(2, -1, k, l, pb_in, mv_in) + end if + end do end do - end do - $:END_GPU_PARALLEL_LOOP() - end if + $:END_GPU_PARALLEL_LOOP() + end if - if (bc_y%end >= 0) then - call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb_in, mv_in) - else - $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (int(bc_type(2, 2)%sf(k, 0, l))) - case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) - call s_ghost_cell_extrapolation(q_prim_vf, 2, 1, k, l) - case (BC_REFLECTIVE) - call s_symmetry(q_prim_vf, 2, 1, k, l, pb_in, mv_in) - case (BC_PERIODIC) - call s_periodic(q_prim_vf, 2, 1, k, l, pb_in, mv_in) - case (BC_SLIP_WALL) - call s_slip_wall(q_prim_vf, 2, 1, k, l) - case (BC_NO_SLIP_WALL) - call s_no_slip_wall(q_prim_vf, 2, 1, k, l) - case (BC_DIRICHLET) - call s_dirichlet(q_prim_vf, 2, 1, k, l) - end select + - if (qbmm .and. (.not. polytropic) .and. & - (bc_type(2, 2)%sf(k, 0, l) <= BC_GHOST_EXTRAP)) then - call s_qbmm_extrapolation(2, 1, k, l, pb_in, mv_in) - end if + if (bc_y%end >= 0) then + call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb_in, mv_in) + else + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = -buff_size, m + buff_size + select case (int(bc_type(2, 2)%sf(k, 0, l))) + case (BC_CHAR_SUP_OUTFLOW:BC_GHOST_EXTRAP) + call s_ghost_cell_extrapolation(q_prim_vf, 2, 1, k, l) + case (BC_REFLECTIVE) + call s_symmetry(q_prim_vf, 2, 1, k, l, pb_in, mv_in) + case (BC_PERIODIC) + call s_periodic(q_prim_vf, 2, 1, k, l, pb_in, mv_in) + case (BC_SLIP_WALL) + call s_slip_wall(q_prim_vf, 2, 1, k, l) + case (BC_NO_SLIP_WALL) + call s_no_slip_wall(q_prim_vf, 2, 1, k, l) + case (BC_DIRICHLET) + call s_dirichlet(q_prim_vf, 2, 1, k, l) + end select + + if (qbmm .and. (.not. polytropic) .and. & + (bc_type(2, 2)%sf(k, 0, l) <= BC_GHOST_EXTRAP)) then + call s_qbmm_extrapolation(2, 1, k, l, pb_in, mv_in) + end if + end do end do - end do - $:END_GPU_PARALLEL_LOOP() - end if + $:END_GPU_PARALLEL_LOOP() + end if + + #:endif ! Population of Buffers in z-direction @@ -1208,44 +1214,48 @@ contains if (n == 0) return - !< y-direction - if (bc_y%beg >= 0) then - call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1) - else - $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (bc_type(2, 1)%sf(k, 0, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 2, -1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 2, -1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 2, -1, k, l) - end select + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 + + !< y-direction + if (bc_y%beg >= 0) then + call s_mpi_sendrecv_variables_buffers(c_divs, 2, -1, num_dims + 1) + else + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = -buff_size, m + buff_size + select case (bc_type(2, 1)%sf(k, 0, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 2, -1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 2, -1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 2, -1, k, l) + end select + end do end do - end do - $:END_GPU_PARALLEL_LOOP() - end if + $:END_GPU_PARALLEL_LOOP() + end if - if (bc_y%end >= 0) then - call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1) - else - $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = -buff_size, m + buff_size - select case (bc_type(2, 2)%sf(k, 0, l)) - case (BC_PERIODIC) - call s_color_function_periodic(c_divs, 2, 1, k, l) - case (BC_REFLECTIVE) - call s_color_function_reflective(c_divs, 2, 1, k, l) - case default - call s_color_function_ghost_cell_extrapolation(c_divs, 2, 1, k, l) - end select + if (bc_y%end >= 0) then + call s_mpi_sendrecv_variables_buffers(c_divs, 2, 1, num_dims + 1) + else + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = -buff_size, m + buff_size + select case (bc_type(2, 2)%sf(k, 0, l)) + case (BC_PERIODIC) + call s_color_function_periodic(c_divs, 2, 1, k, l) + case (BC_REFLECTIVE) + call s_color_function_reflective(c_divs, 2, 1, k, l) + case default + call s_color_function_ghost_cell_extrapolation(c_divs, 2, 1, k, l) + end select + end do end do - end do - $:END_GPU_PARALLEL_LOOP() - end if + $:END_GPU_PARALLEL_LOOP() + end if + + #:endif if (p == 0) return @@ -1539,58 +1549,62 @@ contains end if - if (n == 0) then - return - else if (bc_y%beg >= 0) then - call s_mpi_sendrecv_variables_buffers(jac_sf, 2, -1, 1) - else - $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(2, 1)%sf(k, 0, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, n - j + 1, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, j - 1, l) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, 0, l) - end do - end select + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + + if (n == 0) then + return + else if (bc_y%beg >= 0) then + call s_mpi_sendrecv_variables_buffers(jac_sf, 2, -1, 1) + else + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(2, 1)%sf(k, 0, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, n - j + 1, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, j - 1, l) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(k, -j, l) = jac_sf(1)%sf(k, 0, l) + end do + end select + end do end do - end do - $:END_GPU_PARALLEL_LOOP() + $:END_GPU_PARALLEL_LOOP() - end if + end if - if (bc_y%end >= 0) then - call s_mpi_sendrecv_variables_buffers(jac_sf, 2, 1, 1) - else - $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) - do l = 0, p - do k = idwbuff(1)%beg, idwbuff(1)%end - select case (bc_type(2, 2)%sf(k, 0, l)) - case (BC_PERIODIC) - do j = 1, buff_size - jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, j - 1, l) - end do - case (BC_REFLECTIVE) - do j = 1, buff_size - jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n - (j - 1), l) - end do - case default - do j = 1, buff_size - jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n, l) - end do - end select + if (bc_y%end >= 0) then + call s_mpi_sendrecv_variables_buffers(jac_sf, 2, 1, 1) + else + $:GPU_PARALLEL_LOOP(private='[l,k]', collapse=2) + do l = 0, p + do k = idwbuff(1)%beg, idwbuff(1)%end + select case (bc_type(2, 2)%sf(k, 0, l)) + case (BC_PERIODIC) + do j = 1, buff_size + jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, j - 1, l) + end do + case (BC_REFLECTIVE) + do j = 1, buff_size + jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n - (j - 1), l) + end do + case default + do j = 1, buff_size + jac_sf(1)%sf(k, n + j, l) = jac_sf(1)%sf(k, n, l) + end do + end select + end do end do - end do - $:END_GPU_PARALLEL_LOOP() - end if + $:END_GPU_PARALLEL_LOOP() + end if + + #:endif #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 if (p == 0) then @@ -1913,27 +1927,35 @@ contains end do end do - if (n > 0) then - do k = 0, p - do j = 1, sys_size - do i = 0, m - bc_buffers(2, 1)%sf(i, j, k) = q_prim_vf(j)%sf(i, 0, k) - bc_buffers(2, 2)%sf(i, j, k) = q_prim_vf(j)%sf(i, n, k) - end do - end do - end do + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - if (p > 0) then - do k = 1, sys_size - do j = 0, n + if (n > 0) then + do k = 0, p + do j = 1, sys_size do i = 0, m - bc_buffers(3, 1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, 0) - bc_buffers(3, 2)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, p) + bc_buffers(2, 1)%sf(i, j, k) = q_prim_vf(j)%sf(i, 0, k) + bc_buffers(2, 2)%sf(i, j, k) = q_prim_vf(j)%sf(i, n, k) end do end do end do + + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + + if (p > 0) then + do k = 1, sys_size + do j = 0, n + do i = 0, m + bc_buffers(3, 1)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, 0) + bc_buffers(3, 2)%sf(i, j, k) = q_prim_vf(k)%sf(i, j, p) + end do + end do + end do + end if + + #:endif end if - end if + + #:endif end subroutine s_pack_boundary_condition_buffers @@ -1945,18 +1967,20 @@ contains bc_type(1, 2)%sf(:, :, :) = int(min(bc_x%end, 0), kind=1) $:GPU_UPDATE(device='[bc_type(1,1)%sf,bc_type(1,2)%sf]') - if (n > 0) then - bc_type(2, 1)%sf(:, :, :) = int(min(bc_y%beg, 0), kind=1) - bc_type(2, 2)%sf(:, :, :) = int(min(bc_y%end, 0), kind=1) - $:GPU_UPDATE(device='[bc_type(2,1)%sf,bc_type(2,2)%sf]') - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - if (p > 0) then - bc_type(3, 1)%sf(:, :, :) = int(min(bc_z%beg, 0), kind=1) - bc_type(3, 2)%sf(:, :, :) = int(min(bc_z%end, 0), kind=1) - $:GPU_UPDATE(device='[bc_type(3,1)%sf,bc_type(3,2)%sf]') - end if - #:endif - end if + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 + if (n > 0) then + bc_type(2, 1)%sf(:, :, :) = int(min(bc_y%beg, 0), kind=1) + bc_type(2, 2)%sf(:, :, :) = int(min(bc_y%end, 0), kind=1) + $:GPU_UPDATE(device='[bc_type(2,1)%sf,bc_type(2,2)%sf]') + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + if (p > 0) then + bc_type(3, 1)%sf(:, :, :) = int(min(bc_z%beg, 0), kind=1) + bc_type(3, 2)%sf(:, :, :) = int(min(bc_z%end, 0), kind=1) + $:GPU_UPDATE(device='[bc_type(3,1)%sf,bc_type(3,2)%sf]') + end if + #:endif + end if + #:endif end subroutine s_assign_default_bc_type @@ -2155,14 +2179,18 @@ contains if (bc_io) then deallocate (bc_buffers(1, 1)%sf) deallocate (bc_buffers(1, 2)%sf) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 if (n > 0) then deallocate (bc_buffers(2, 1)%sf) deallocate (bc_buffers(2, 2)%sf) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 if (p > 0) then deallocate (bc_buffers(3, 1)%sf) deallocate (bc_buffers(3, 2)%sf) end if + #:endif end if + #:endif end if deallocate (bc_buffers) diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 8339766813..5d3d1b7811 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -19,12 +19,12 @@ module m_chemistry implicit none - #:block DEF_AMD + #:if USING_AMD real(wp) :: molecular_weights_nonparameter(10) = & (/2.016, 1.008, 15.999, 31.998, 17.007, 18.015, 33.006, & 34.014, 39.95, 28.014/) $:GPU_DECLARE(create='[molecular_weights_nonparameter]') - #:endblock DEF_AMD + #:endif type(int_bounds_info) :: isc1, isc2, isc3 $:GPU_DECLARE(create='[isc1, isc2, isc3]') @@ -129,7 +129,7 @@ contains real(wp), dimension(num_species) :: Ys real(wp), dimension(num_species) :: omega - $:GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega, eqn, T, rho, omega, omega_m]', copyin='[bounds]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega, eqn, T, rho, omega_m]', copyin='[bounds]') do z = bounds(3)%beg, bounds(3)%end do y = bounds(2)%beg, bounds(2)%end do x = bounds(1)%beg, bounds(1)%end @@ -146,12 +146,11 @@ contains $:GPU_LOOP(parallelism='[seq]') do eqn = chemxb, chemxe - #:block UNDEF_AMD - omega_m = molecular_weights(eqn - chemxb + 1)*omega(eqn - chemxb + 1) - #:endblock UNDEF_AMD - #:block DEF_AMD + #:if USING_AMD omega_m = molecular_weights_nonparameter(eqn - chemxb + 1)*omega(eqn - chemxb + 1) - #:endblock DEF_AMD + #:else + omega_m = molecular_weights(eqn - chemxb + 1)*omega(eqn - chemxb + 1) + #:endif rhs_vf(eqn)%sf(x, y, z) = rhs_vf(eqn)%sf(x, y, z) + omega_m end do @@ -189,218 +188,224 @@ contains $:GPU_UPDATE(device='[isc1,isc2,isc3]') - if (chemistry) then + if (chemistry .or. dummy) then ! Set offsets based on direction using array indexing offsets = 0 offsets(idir) = 1 ! Model 1: Mixture-Average Transport if (chem_params%transport_model == 1) then - #:block UNDEF_AMD - ! Note: Added 'i' and 'eqn' to private list. - $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,i,eqn,Ys_L, Ys_R, Ys_cell, Xs_L, Xs_R, mass_diffusivities_mixavg1, mass_diffusivities_mixavg2, mass_diffusivities_mixavg_Cell, h_l, h_r, Xs_cell, h_k, dXk_dxi,Mass_Diffu_Flux, Mass_Diffu_Energy, MW_L, MW_R, MW_cell, Rgas_L, Rgas_R, T_L, T_R, P_L, P_R, rho_L, rho_R, rho_cell, rho_Vic, lambda_L, lambda_R, lambda_Cell, dT_dxi, grid_spacing]', copyin='[offsets]') - do z = isc3%beg, isc3%end - do y = isc2%beg, isc2%end - do x = isc1%beg, isc1%end - ! Calculate grid spacing using direction-based indexing - select case (idir) - case (1) - grid_spacing = x_cc(x + 1) - x_cc(x) - case (2) - grid_spacing = y_cc(y + 1) - y_cc(y) - case (3) - grid_spacing = z_cc(z + 1) - z_cc(z) - end select - - ! Extract species mass fractions - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = q_prim_qp(i)%sf(x, y, z) - Ys_R(i - chemxb + 1) = q_prim_qp(i)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - Ys_cell(i - chemxb + 1) = 0.5_wp*(Ys_L(i - chemxb + 1) + Ys_R(i - chemxb + 1)) - end do - - ! Calculate molecular weights and mole fractions - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) - MW_cell = 0.5_wp*(MW_L + MW_R) - - call get_mole_fractions(MW_L, Ys_L, Xs_L) - call get_mole_fractions(MW_R, Ys_R, Xs_R) - - ! Calculate gas constants and thermodynamic properties - Rgas_L = gas_constant/MW_L - Rgas_R = gas_constant/MW_R - - P_L = q_prim_qp(E_idx)%sf(x, y, z) - P_R = q_prim_qp(E_idx)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - - rho_L = q_prim_qp(1)%sf(x, y, z) - rho_R = q_prim_qp(1)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - - T_L = P_L/rho_L/Rgas_L - T_R = P_R/rho_R/Rgas_R - - rho_cell = 0.5_wp*(rho_L + rho_R) - dT_dxi = (T_R - T_L)/grid_spacing - - ! Get transport properties - call get_species_mass_diffusivities_mixavg(P_L, T_L, Ys_L, mass_diffusivities_mixavg1) - call get_species_mass_diffusivities_mixavg(P_R, T_R, Ys_R, mass_diffusivities_mixavg2) - - call get_mixture_thermal_conductivity_mixavg(T_L, Ys_L, lambda_L) - call get_mixture_thermal_conductivity_mixavg(T_R, Ys_R, lambda_R) - - call get_species_enthalpies_rt(T_L, h_l) - call get_species_enthalpies_rt(T_R, h_r) - - ! Calculate species properties and gradients - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - h_l(i - chemxb + 1) = h_l(i - chemxb + 1)*gas_constant*T_L/molecular_weights(i - chemxb + 1) - h_r(i - chemxb + 1) = h_r(i - chemxb + 1)*gas_constant*T_R/molecular_weights(i - chemxb + 1) - Xs_cell(i - chemxb + 1) = 0.5_wp*(Xs_L(i - chemxb + 1) + Xs_R(i - chemxb + 1)) - h_k(i - chemxb + 1) = 0.5_wp*(h_l(i - chemxb + 1) + h_r(i - chemxb + 1)) - dXk_dxi(i - chemxb + 1) = (Xs_R(i - chemxb + 1) - Xs_L(i - chemxb + 1))/grid_spacing - end do - - ! Calculate mixture-averaged diffusivities - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - mass_diffusivities_mixavg_Cell(i - chemxb + 1) = & - (mass_diffusivities_mixavg2(i - chemxb + 1) + mass_diffusivities_mixavg1(i - chemxb + 1))/2.0_wp - end do - - lambda_Cell = 0.5_wp*(lambda_R + lambda_L) - - ! Calculate mass diffusion fluxes - rho_Vic = 0.0_wp - Mass_Diffu_Energy = 0.0_wp - - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell*mass_diffusivities_mixavg_Cell(eqn - chemxb + 1)* & - molecular_weights(eqn - chemxb + 1)/MW_cell*dXk_dxi(eqn - chemxb + 1) - rho_Vic = rho_Vic + Mass_Diffu_Flux(eqn - chemxb + 1) - Mass_Diffu_Energy = Mass_Diffu_Energy + h_k(eqn - chemxb + 1)*Mass_Diffu_Flux(eqn - chemxb + 1) - end do - - ! Apply corrections for mass conservation - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - Mass_Diffu_Energy = Mass_Diffu_Energy - h_k(eqn - chemxb + 1)*Ys_cell(eqn - chemxb + 1)*rho_Vic - Mass_Diffu_Flux(eqn - chemxb + 1) = Mass_Diffu_Flux(eqn - chemxb + 1) - rho_Vic*Ys_cell(eqn - chemxb + 1) - end do - - ! Add thermal conduction contribution - Mass_Diffu_Energy = lambda_Cell*dT_dxi + Mass_Diffu_Energy - - ! Update flux arrays - flux_src_vf(E_idx)%sf(x, y, z) = flux_src_vf(E_idx)%sf(x, y, z) - Mass_Diffu_Energy - - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - flux_src_vf(eqn)%sf(x, y, z) = flux_src_vf(eqn)%sf(x, y, z) - Mass_Diffu_Flux(eqn - chemxb + 1) - end do + ! Note: Added 'i' and 'eqn' to private list. + $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,i,eqn,Ys_L, Ys_R, Ys_cell, Xs_L, Xs_R, mass_diffusivities_mixavg1, mass_diffusivities_mixavg2, mass_diffusivities_mixavg_Cell, h_l, h_r, Xs_cell, h_k, dXk_dxi,Mass_Diffu_Flux, Mass_Diffu_Energy, MW_L, MW_R, MW_cell, Rgas_L, Rgas_R, T_L, T_R, P_L, P_R, rho_L, rho_R, rho_cell, rho_Vic, lambda_L, lambda_R, lambda_Cell, dT_dxi, grid_spacing]', copyin='[offsets]') + do z = isc3%beg, isc3%end + do y = isc2%beg, isc2%end + do x = isc1%beg, isc1%end + ! Calculate grid spacing using direction-based indexing + select case (idir) + case (1) + grid_spacing = x_cc(x + 1) - x_cc(x) + case (2) + grid_spacing = y_cc(y + 1) - y_cc(y) + case (3) + grid_spacing = z_cc(z + 1) - z_cc(z) + end select + + ! Extract species mass fractions + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = q_prim_qp(i)%sf(x, y, z) + Ys_R(i - chemxb + 1) = q_prim_qp(i)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + Ys_cell(i - chemxb + 1) = 0.5_wp*(Ys_L(i - chemxb + 1) + Ys_R(i - chemxb + 1)) + end do + + ! Calculate molecular weights and mole fractions + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) + MW_cell = 0.5_wp*(MW_L + MW_R) + + call get_mole_fractions(MW_L, Ys_L, Xs_L) + call get_mole_fractions(MW_R, Ys_R, Xs_R) + + ! Calculate gas constants and thermodynamic properties + Rgas_L = gas_constant/MW_L + Rgas_R = gas_constant/MW_R + + P_L = q_prim_qp(E_idx)%sf(x, y, z) + P_R = q_prim_qp(E_idx)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + + rho_L = q_prim_qp(1)%sf(x, y, z) + rho_R = q_prim_qp(1)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + + T_L = P_L/rho_L/Rgas_L + T_R = P_R/rho_R/Rgas_R + + rho_cell = 0.5_wp*(rho_L + rho_R) + dT_dxi = (T_R - T_L)/grid_spacing + + ! Get transport properties + call get_species_mass_diffusivities_mixavg(P_L, T_L, Ys_L, mass_diffusivities_mixavg1) + call get_species_mass_diffusivities_mixavg(P_R, T_R, Ys_R, mass_diffusivities_mixavg2) + + call get_mixture_thermal_conductivity_mixavg(T_L, Ys_L, lambda_L) + call get_mixture_thermal_conductivity_mixavg(T_R, Ys_R, lambda_R) + + call get_species_enthalpies_rt(T_L, h_l) + call get_species_enthalpies_rt(T_R, h_r) + + ! Calculate species properties and gradients + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + #:if USING_AMD + h_l(i - chemxb + 1) = h_l(i - chemxb + 1)*gas_constant*T_L/molecular_weights_nonparameter(i - chemxb + 1) + h_r(i - chemxb + 1) = h_r(i - chemxb + 1)*gas_constant*T_R/molecular_weights_nonparameter(i - chemxb + 1) + #:else + h_l(i - chemxb + 1) = h_l(i - chemxb + 1)*gas_constant*T_L/molecular_weights(i - chemxb + 1) + h_r(i - chemxb + 1) = h_r(i - chemxb + 1)*gas_constant*T_R/molecular_weights(i - chemxb + 1) + #:endif + Xs_cell(i - chemxb + 1) = 0.5_wp*(Xs_L(i - chemxb + 1) + Xs_R(i - chemxb + 1)) + h_k(i - chemxb + 1) = 0.5_wp*(h_l(i - chemxb + 1) + h_r(i - chemxb + 1)) + dXk_dxi(i - chemxb + 1) = (Xs_R(i - chemxb + 1) - Xs_L(i - chemxb + 1))/grid_spacing + end do + + ! Calculate mixture-averaged diffusivities + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + mass_diffusivities_mixavg_Cell(i - chemxb + 1) = & + (mass_diffusivities_mixavg2(i - chemxb + 1) + mass_diffusivities_mixavg1(i - chemxb + 1))/2.0_wp + end do + + lambda_Cell = 0.5_wp*(lambda_R + lambda_L) + + ! Calculate mass diffusion fluxes + rho_Vic = 0.0_wp + Mass_Diffu_Energy = 0.0_wp + + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + #:if USING_AMD + Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell*mass_diffusivities_mixavg_Cell(eqn - chemxb + 1)* & + molecular_weights_nonparameter(eqn - chemxb + 1)/MW_cell*dXk_dxi(eqn - chemxb + 1) + #:else + Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell*mass_diffusivities_mixavg_Cell(eqn - chemxb + 1)* & + molecular_weights(eqn - chemxb + 1)/MW_cell*dXk_dxi(eqn - chemxb + 1) + #:endif + rho_Vic = rho_Vic + Mass_Diffu_Flux(eqn - chemxb + 1) + Mass_Diffu_Energy = Mass_Diffu_Energy + h_k(eqn - chemxb + 1)*Mass_Diffu_Flux(eqn - chemxb + 1) + end do + + ! Apply corrections for mass conservation + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + Mass_Diffu_Energy = Mass_Diffu_Energy - h_k(eqn - chemxb + 1)*Ys_cell(eqn - chemxb + 1)*rho_Vic + Mass_Diffu_Flux(eqn - chemxb + 1) = Mass_Diffu_Flux(eqn - chemxb + 1) - rho_Vic*Ys_cell(eqn - chemxb + 1) + end do + + ! Add thermal conduction contribution + Mass_Diffu_Energy = lambda_Cell*dT_dxi + Mass_Diffu_Energy + + ! Update flux arrays + flux_src_vf(E_idx)%sf(x, y, z) = flux_src_vf(E_idx)%sf(x, y, z) - Mass_Diffu_Energy + + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + flux_src_vf(eqn)%sf(x, y, z) = flux_src_vf(eqn)%sf(x, y, z) - Mass_Diffu_Flux(eqn - chemxb + 1) end do end do end do - $:END_GPU_PARALLEL_LOOP() - #:endblock UNDEF_AMD + end do + $:END_GPU_PARALLEL_LOOP() ! Model 2: Unity Lewis Number else if (chem_params%transport_model == 2) then - #:block UNDEF_AMD - ! Note: Added ALL scalars and 'i'/'eqn' to private list to prevent race conditions. - $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,i,eqn,Ys_L, Ys_R, Ys_cell, dYk_dxi, Mass_Diffu_Flux, grid_spacing, MW_L, MW_R, MW_cell, Rgas_L, Rgas_R, P_L, P_R, rho_L, rho_R, rho_cell, T_L, T_R, Cp_L, Cp_R, hmix_L, hmix_R, dh_dxi, lambda_L, lambda_R, lambda_Cell, diffusivity_L, diffusivity_R, diffusivity_cell, Mass_Diffu_Energy]', copyin='[offsets]') - do z = isc3%beg, isc3%end - do y = isc2%beg, isc2%end - do x = isc1%beg, isc1%end - ! Calculate grid spacing using direction-based indexing - select case (idir) - case (1) - grid_spacing = x_cc(x + 1) - x_cc(x) - case (2) - grid_spacing = y_cc(y + 1) - y_cc(y) - case (3) - grid_spacing = z_cc(z + 1) - z_cc(z) - end select - - ! Extract species mass fractions - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - Ys_L(i - chemxb + 1) = q_prim_qp(i)%sf(x, y, z) - Ys_R(i - chemxb + 1) = q_prim_qp(i)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - Ys_cell(i - chemxb + 1) = 0.5_wp*(Ys_L(i - chemxb + 1) + Ys_R(i - chemxb + 1)) - end do - - ! Calculate molecular weights and mole fractions - call get_mixture_molecular_weight(Ys_L, MW_L) - call get_mixture_molecular_weight(Ys_R, MW_R) - MW_cell = 0.5_wp*(MW_L + MW_R) - - ! Calculate gas constants and thermodynamic properties - Rgas_L = gas_constant/MW_L - Rgas_R = gas_constant/MW_R - - P_L = q_prim_qp(E_idx)%sf(x, y, z) - P_R = q_prim_qp(E_idx)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - - rho_L = q_prim_qp(1)%sf(x, y, z) - rho_R = q_prim_qp(1)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) - - T_L = P_L/rho_L/Rgas_L - T_R = P_R/rho_R/Rgas_R - - rho_cell = 0.5_wp*(rho_L + rho_R) - - call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) - call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) - call get_mixture_enthalpy_mass(T_L, Ys_L, hmix_L) - call get_mixture_enthalpy_mass(T_R, Ys_R, hmix_R) - dh_dxi = (hmix_R - hmix_L)/grid_spacing - - ! Get transport properties - call get_mixture_thermal_conductivity_mixavg(T_L, Ys_L, lambda_L) - call get_mixture_thermal_conductivity_mixavg(T_R, Ys_R, lambda_R) - - ! Calculate species properties and gradients - $:GPU_LOOP(parallelism='[seq]') - do i = chemxb, chemxe - dYk_dxi(i - chemxb + 1) = (Ys_R(i - chemxb + 1) - & - Ys_L(i - chemxb + 1))/grid_spacing - end do - - ! Calculate mixture-averaged diffusivities - diffusivity_L = lambda_L/rho_L/Cp_L - diffusivity_R = lambda_R/rho_R/Cp_R - - lambda_Cell = 0.5_wp*(lambda_R + lambda_L) - diffusivity_cell = 0.5_wp*(diffusivity_R + diffusivity_L) - - ! Calculate mass diffusion fluxes - Mass_Diffu_Energy = 0.0_wp - - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell* & - diffusivity_cell* & - dYk_dxi(eqn - chemxb + 1) - end do - Mass_Diffu_Energy = rho_cell*diffusivity_cell*dh_dxi - - ! Update flux arrays - flux_src_vf(E_idx)%sf(x, y, z) = flux_src_vf(E_idx)%sf(x, y, z) - Mass_Diffu_Energy - - $:GPU_LOOP(parallelism='[seq]') - do eqn = chemxb, chemxe - flux_src_vf(eqn)%sf(x, y, z) = flux_src_vf(eqn)%sf(x, y, z) - Mass_Diffu_Flux(eqn - chemxb + 1) - end do + ! Note: Added ALL scalars and 'i'/'eqn' to private list to prevent race conditions. + $:GPU_PARALLEL_LOOP(collapse=3, private='[x,y,z,i,eqn,Ys_L, Ys_R, Ys_cell, dYk_dxi, Mass_Diffu_Flux, grid_spacing, MW_L, MW_R, MW_cell, Rgas_L, Rgas_R, P_L, P_R, rho_L, rho_R, rho_cell, T_L, T_R, Cp_L, Cp_R, hmix_L, hmix_R, dh_dxi, lambda_L, lambda_R, lambda_Cell, diffusivity_L, diffusivity_R, diffusivity_cell, Mass_Diffu_Energy]', copyin='[offsets]') + do z = isc3%beg, isc3%end + do y = isc2%beg, isc2%end + do x = isc1%beg, isc1%end + ! Calculate grid spacing using direction-based indexing + select case (idir) + case (1) + grid_spacing = x_cc(x + 1) - x_cc(x) + case (2) + grid_spacing = y_cc(y + 1) - y_cc(y) + case (3) + grid_spacing = z_cc(z + 1) - z_cc(z) + end select + + ! Extract species mass fractions + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + Ys_L(i - chemxb + 1) = q_prim_qp(i)%sf(x, y, z) + Ys_R(i - chemxb + 1) = q_prim_qp(i)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + Ys_cell(i - chemxb + 1) = 0.5_wp*(Ys_L(i - chemxb + 1) + Ys_R(i - chemxb + 1)) + end do + + ! Calculate molecular weights and mole fractions + call get_mixture_molecular_weight(Ys_L, MW_L) + call get_mixture_molecular_weight(Ys_R, MW_R) + MW_cell = 0.5_wp*(MW_L + MW_R) + + ! Calculate gas constants and thermodynamic properties + Rgas_L = gas_constant/MW_L + Rgas_R = gas_constant/MW_R + + P_L = q_prim_qp(E_idx)%sf(x, y, z) + P_R = q_prim_qp(E_idx)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + + rho_L = q_prim_qp(1)%sf(x, y, z) + rho_R = q_prim_qp(1)%sf(x + offsets(1), y + offsets(2), z + offsets(3)) + + T_L = P_L/rho_L/Rgas_L + T_R = P_R/rho_R/Rgas_R + + rho_cell = 0.5_wp*(rho_L + rho_R) + + call get_mixture_specific_heat_cp_mass(T_L, Ys_L, Cp_L) + call get_mixture_specific_heat_cp_mass(T_R, Ys_R, Cp_R) + call get_mixture_enthalpy_mass(T_L, Ys_L, hmix_L) + call get_mixture_enthalpy_mass(T_R, Ys_R, hmix_R) + dh_dxi = (hmix_R - hmix_L)/grid_spacing + + ! Get transport properties + call get_mixture_thermal_conductivity_mixavg(T_L, Ys_L, lambda_L) + call get_mixture_thermal_conductivity_mixavg(T_R, Ys_R, lambda_R) + + ! Calculate species properties and gradients + $:GPU_LOOP(parallelism='[seq]') + do i = chemxb, chemxe + dYk_dxi(i - chemxb + 1) = (Ys_R(i - chemxb + 1) - & + Ys_L(i - chemxb + 1))/grid_spacing + end do + + ! Calculate mixture-averaged diffusivities + diffusivity_L = lambda_L/rho_L/Cp_L + diffusivity_R = lambda_R/rho_R/Cp_R + + lambda_Cell = 0.5_wp*(lambda_R + lambda_L) + diffusivity_cell = 0.5_wp*(diffusivity_R + diffusivity_L) + + ! Calculate mass diffusion fluxes + Mass_Diffu_Energy = 0.0_wp + + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell* & + diffusivity_cell* & + dYk_dxi(eqn - chemxb + 1) + end do + Mass_Diffu_Energy = rho_cell*diffusivity_cell*dh_dxi + + ! Update flux arrays + flux_src_vf(E_idx)%sf(x, y, z) = flux_src_vf(E_idx)%sf(x, y, z) - Mass_Diffu_Energy + + $:GPU_LOOP(parallelism='[seq]') + do eqn = chemxb, chemxe + flux_src_vf(eqn)%sf(x, y, z) = flux_src_vf(eqn)%sf(x, y, z) - Mass_Diffu_Flux(eqn - chemxb + 1) end do end do end do - $:END_GPU_PARALLEL_LOOP() - #:endblock UNDEF_AMD + end do + $:END_GPU_PARALLEL_LOOP() end if end if diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 39e5c7800c..e4585d011a 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -245,6 +245,7 @@ module m_global_parameters logical :: E_wrt logical, dimension(num_fluids_max) :: alpha_rho_e_wrt logical :: fft_wrt + logical :: dummy logical :: pres_wrt logical, dimension(num_fluids_max) :: alpha_wrt logical :: gamma_wrt @@ -475,6 +476,7 @@ contains file_per_process = .false. E_wrt = .false. fft_wrt = .false. + dummy = .false. pres_wrt = .false. alpha_wrt = .false. gamma_wrt = .false. diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index ea679ba6f5..8380f5bb02 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -300,6 +300,7 @@ module m_global_parameters !! to the next time-step. logical :: fft_wrt + logical :: dummy contains @@ -409,6 +410,7 @@ contains elliptic_smoothing = .false. fft_wrt = .false. + dummy = .false. simplex_perturb = .false. simplex_params%perturb_vel(:) = .false. diff --git a/src/simulation/include/inline_capillary.fpp b/src/simulation/include/inline_capillary.fpp index ccd451f7c6..89e1aabaec 100644 --- a/src/simulation/include/inline_capillary.fpp +++ b/src/simulation/include/inline_capillary.fpp @@ -1,11 +1,12 @@ #:def compute_capillary_stress_tensor() Omega(1, 1) = -sigma*(w2*w2 + w3*w3)/normW + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 + Omega(2, 1) = sigma*w1*w2/normW + Omega(1, 2) = Omega(2, 1) - Omega(2, 1) = sigma*w1*w2/normW - Omega(1, 2) = Omega(2, 1) - - Omega(2, 2) = -sigma*(w1*w1 + w3*w3)/normW + Omega(2, 2) = -sigma*(w1*w1 + w3*w3)/normW + #:endif if (p > 0) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index b5027b9fd1..0950072042 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -37,9 +37,9 @@ module m_cbc molecular_weights, get_species_specific_heats_r, & get_mole_fractions, get_species_specific_heats_r - #:block DEF_AMD + #:if USING_AMD use m_chemistry, only: molecular_weights_nonparameter - #:endblock DEF_AMD + #:endif implicit none private; public :: s_initialize_cbc_module, s_cbc, s_finalize_cbc_module @@ -700,7 +700,7 @@ contains if (cbc_dir == ${CBC_DIR}$ .and. recon_type == WENO_TYPE) then ! PI2 of flux_rs_vf and flux_src_rs_vf at j = 1/2 - if (weno_order == 3) then + if (weno_order == 3 .or. dummy) then call s_convert_primitive_to_flux_variables(q_prim_rs${XYZ}$_vf, & F_rs${XYZ}$_vf, & @@ -732,9 +732,10 @@ contains end do end do $:END_GPU_PARALLEL_LOOP() + end if ! PI4 of flux_rs_vf and flux_src_rs_vf at j = 1/2, 3/2 - else + if(weno_order == 5 .or. dummy) then call s_convert_primitive_to_flux_variables(q_prim_rs${XYZ}$_vf, & F_rs${XYZ}$_vf, & F_src_rs${XYZ}$_vf, & @@ -1059,15 +1060,14 @@ contains sum_Enthalpies = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_species - #:block UNDEF_AMD - h_k(i) = h_k(i)*gas_constant/molecular_weights(i)*T - sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights(i)*Cp/R_gas)*dYs_dt(i) - #:endblock UNDEF_AMD - - #:block DEF_AMD + + #:if USING_AMD h_k(i) = h_k(i)*gas_constant/molecular_weights_nonparameter(i)*T sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights_nonparameter(i)*Cp/R_gas)*dYs_dt(i) - #:endblock DEF_AMD + #:else + h_k(i) = h_k(i)*gas_constant/molecular_weights(i)*T + sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights(i)*Cp/R_gas)*dYs_dt(i) + #:endif end do flux_rs${XYZ}$_vf_l(-1, k, r, E_idx) = flux_rs${XYZ}$_vf_l(0, k, r, E_idx) & + ds(0)*((E/rho + pres/rho)*drho_dt + rho*vel_dv_dt_sum + Cp*T*L(2)/(c*c) + sum_Enthalpies) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index d2fee92764..d239532f43 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -2,6 +2,7 @@ !! @brief Contains module m_data_output #:include 'macros.fpp' +#:include 'case.fpp' !> @brief The primary purpose of this module is to output the grid and the !! conservative variables data at the chosen time-step interval. In @@ -267,10 +268,15 @@ contains integer, intent(in) :: t_step real(wp) :: rho !< Cell-avg. density +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(2) :: alpha !< Cell-avg. volume fraction + real(wp), dimension(3) :: vel !< Cell-avg. velocity +#:else + real(wp), dimension(num_fluids) :: alpha !< Cell-avg. volume fraction real(wp), dimension(num_vels) :: vel !< Cell-avg. velocity +#:endif real(wp) :: vel_sum !< Cell-avg. velocity sum real(wp) :: pres !< Cell-avg. pressure - real(wp), dimension(num_fluids) :: alpha !< Cell-avg. volume fraction real(wp) :: gamma !< Cell-avg. sp. heat ratio real(wp) :: pi_inf !< Cell-avg. liquid stiffness function real(wp) :: qv !< Cell-avg. internal energy reference value @@ -320,7 +326,7 @@ contains #:call GPU_PARALLEL(copyout='[icfl_max_loc]', copyin='[icfl_sf]') icfl_max_loc = maxval(icfl_sf) #:endcall GPU_PARALLEL - if (viscous) then + if (viscous .or. dummy) then #:call GPU_PARALLEL(copyout='[vcfl_max_loc, Rc_min_loc]', copyin='[vcfl_sf,Rc_sf]') vcfl_max_loc = maxval(vcfl_sf) Rc_min_loc = minval(Rc_sf) @@ -1567,6 +1573,7 @@ contains end if elseif (p == 0) then if (bubbles_euler) then + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 write (i + 30, '(6X,10F24.8)') & nondim_time, & rho, & @@ -1578,7 +1585,9 @@ contains nRdot(1), & R(1), & Rdot(1) + #:endif else if (elasticity) then + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 write (i + 30, '(6X,F12.6,F24.8,F24.8,F24.8,F24.8,'// & 'F24.8,F24.8,F24.8)') & nondim_time, & @@ -1589,6 +1598,7 @@ contains tau_e(1), & tau_e(2), & tau_e(3) + #:endif else write (i + 30, '(6X,F12.6,F24.8,F24.8,F24.8)') & nondim_time, & @@ -1598,6 +1608,7 @@ contains print *, 'time =', nondim_time, 'rho =', rho, 'pres =', pres end if else + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 write (i + 30, '(6X,F12.6,F24.8,F24.8,F24.8,F24.8,'// & 'F24.8,F24.8,F24.8,F24.8,F24.8,'// & 'F24.8)') & @@ -1612,6 +1623,7 @@ contains qv, & c, & accel + #:endif end if end if end do diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 50967956ae..122133f844 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -543,6 +543,7 @@ module m_global_parameters $:GPU_DECLARE(create='[Bx0,powell]') logical :: fft_wrt + logical :: dummy !> @name Continuum damage model parameters !> @{! @@ -761,6 +762,7 @@ contains #:endfor fft_wrt = .false. + dummy = .false. do j = 1, num_probes_max acoustic(j)%pulse = dflt_int diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index 21625069b2..76e0c74841 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -106,7 +106,7 @@ contains real(wp) :: G_local integer :: j, k, l, i, r - $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_K, alpha_rho_K, rho, gamma, pi_inf, qv, G_local, Re, tensora, tensorb, i]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,alpha_K, alpha_rho_K, rho, gamma, pi_inf, qv, G_local, Re, tensora, tensorb]') do l = 0, p do k = 0, n do j = 0, m diff --git a/src/simulation/m_igr.fpp b/src/simulation/m_igr.fpp index 60555b554b..e99c9a6401 100644 --- a/src/simulation/m_igr.fpp +++ b/src/simulation/m_igr.fpp @@ -356,7 +356,7 @@ contains call s_populate_F_igr_buffers(bc_type, jac_sf) - if (igr_iter_solver == 1) then ! Jacobi iteration + if (igr_iter_solver == 1 .or. dummy) then ! Jacobi iteration $:GPU_PARALLEL_LOOP(private='[j,k,l]', collapse=3) do l = idwbuff(3)%beg, idwbuff(3)%end do k = idwbuff(2)%beg, idwbuff(2)%end @@ -487,6 +487,7 @@ contains if (idir == 1) then if (p == 0) then + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') do l = 0, p do k = 0, n @@ -901,6 +902,7 @@ contains end do end do $:END_GPU_PARALLEL_LOOP() + #:endif else #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') @@ -1418,6 +1420,7 @@ contains end if else if (idir == 2) then if (p == 0) then + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') do l = 0, p do k = -1, n @@ -1812,6 +1815,7 @@ contains end do end do $:END_GPU_PARALLEL_LOOP() + #:endif else #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') diff --git a/src/simulation/m_muscl.fpp b/src/simulation/m_muscl.fpp index c9d5b4bff0..91e32a7c6a 100644 --- a/src/simulation/m_muscl.fpp +++ b/src/simulation/m_muscl.fpp @@ -112,11 +112,11 @@ contains $:GPU_UPDATE(device='[is1_muscl,is2_muscl,is3_muscl]') - if (muscl_order /= 1) then + if (muscl_order /= 1 .or. dummy) then call s_initialize_muscl(v_vf, muscl_dir) end if - if (muscl_order == 1) then + if (muscl_order == 1 .or. dummy) then if (muscl_dir == 1) then $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) @@ -157,8 +157,9 @@ contains end do $:END_GPU_PARALLEL_LOOP() end if + end if - else if (muscl_order == 2) then + if (muscl_order == 2 .or. dummy) then ! MUSCL Reconstruction #:for MUSCL_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (muscl_dir == ${MUSCL_DIR}$) then diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 2882a26616..1c8eda1c30 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -659,7 +659,7 @@ contains call cpu_time(t_start) - if (.not. igr) then + if (.not. igr .or. dummy) then ! Association/Population of Working Variables $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size @@ -697,11 +697,12 @@ contains end if end if - if (igr) then + if (igr .or. dummy) then call nvtxStartRange("RHS-COMMUNICATION") call s_populate_variables_buffers(bc_type, q_cons_vf, pb_in, mv_in) call nvtxEndRange - else + end if + if(.not. igr .or. dummy) then call nvtxStartRange("RHS-CONVERT") call s_convert_conservative_to_primitive_variables( & q_cons_qp%vf, & @@ -727,7 +728,7 @@ contains if (qbmm) call s_mom_inv(q_cons_qp%vf, q_prim_qp%vf, mom_sp, mom_3d, pb_in, rhs_pb, mv_in, rhs_mv, idwbuff(1), idwbuff(2), idwbuff(3)) - if (viscous .and. .not. igr) then + if ((viscous .and. .not. igr) .or. dummy) then call nvtxStartRange("RHS-VISCOUS") call s_get_viscous(qL_rsx_vf, qL_rsy_vf, qL_rsz_vf, & dqL_prim_dx_n, dqL_prim_dy_n, dqL_prim_dz_n, & @@ -750,7 +751,7 @@ contains ! Dimensional Splitting Loop do id = 1, num_dims - if (igr) then + if (igr .or. dummy) then if (id == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) @@ -779,8 +780,8 @@ contains call s_igr_sigma_x(q_cons_vf, rhs_vf) call nvtxEndRange end if - - else ! Finite volume solve + end if + if(.not. igr .or. dummy) then! Finite volume solve ! Reconstructing Primitive/Conservative Variables call nvtxStartRange("RHS-WENO") @@ -1055,7 +1056,7 @@ contains ! END: Additional pphysics and source terms if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange) then - if (.not. igr) then + if (.not. igr .or. dummy) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = 1, sys_size do l = idwbuff(3)%beg, idwbuff(3)%end @@ -1632,7 +1633,7 @@ contains end if if (cyl_coord .and. ((bc_y%beg == -2) .or. (bc_y%beg == -14))) then - if (viscous) then + if (viscous .or. dummy) then if (p > 0) then call s_compute_viscous_stress_cylindrical_boundary(q_prim_vf, & dq_prim_dx_vf(mom_idx%beg:mom_idx%end), & @@ -1741,7 +1742,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - if (viscous) then + if (viscous .or. dummy) then $:GPU_PARALLEL_LOOP(private='[i,j,l]', collapse=2) do l = 0, p do j = 0, m @@ -1875,7 +1876,7 @@ contains integer :: i, j, k, l #:for SCHEME, TYPE in [('weno','WENO_TYPE'), ('muscl','MUSCL_TYPE')] - if (recon_type == ${TYPE}$) then + if (recon_type == ${TYPE}$ .or. dummy) then ! Reconstruction in s1-direction if (norm_dir == 1) then is1 = idwbuff(1); is2 = idwbuff(2); is3 = idwbuff(3) @@ -1930,7 +1931,7 @@ contains ! Reconstruction in s1-direction #:for SCHEME, TYPE in [('weno','WENO_TYPE'), ('muscl', 'MUSCL_TYPE')] - if (recon_type == ${TYPE}$) then + if (recon_type == ${TYPE}$ .or. dummy) then if (norm_dir == 1) then is1 = idwbuff(1); is2 = idwbuff(2); is3 = idwbuff(3) recon_dir = 1; is1%beg = is1%beg + ${SCHEME}$_polyn @@ -1949,49 +1950,49 @@ contains end if $:GPU_UPDATE(device='[is1,is2,is3,iv]') + end if + #:endfor - if (recon_dir == 1) then - $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - end do - end do + if (recon_dir == 1) then + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) end do end do - $:END_GPU_PARALLEL_LOOP() - else if (recon_dir == 2) then - $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - end do - end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + else if (recon_dir == 2) then + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) end do end do - $:END_GPU_PARALLEL_LOOP() - else if (recon_dir == 3) then - $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - end do - end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + else if (recon_dir == 3) then + $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) end do end do - $:END_GPU_PARALLEL_LOOP() - end if - end if - #:endfor + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if end subroutine s_reconstruct_cell_boundary_values_first_order diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index 7e1f67d2d4..fc5cefadaf 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -987,7 +987,7 @@ contains #:endfor - if (viscous) then + if (viscous .or. dummy) then if (weno_Re_flux) then call s_compute_viscous_source_flux( & @@ -1288,6 +1288,7 @@ contains H_L = (E_L + pres_L)/rho_L H_R = (E_R + pres_R)/rho_R elseif (mhd .and. relativity) then + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) @@ -1310,6 +1311,7 @@ contains E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R + #:endif elseif (mhd .and. .not. relativity) then pres_mag%L = 0.5_wp*(B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp) pres_mag%R = 0.5_wp*(B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp) @@ -1496,11 +1498,13 @@ contains ! Energy if (mhd .and. (.not. relativity)) then ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 flux_rs${XYZ}$_vf(j, k, l, E_idx) = & (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & + s_M*s_P*(E_L - E_R)) & /(s_M - s_P) + #:endif elseif (mhd .and. relativity) then ! energy flux = m_${XYZ}$ - mass flux ! Hard-coded for single-component for now @@ -1648,7 +1652,7 @@ contains #:endfor - if (viscous) then + if (viscous .or. dummy) then $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l, idx_right_phys, vel_grad_L, vel_grad_R, alpha_L, alpha_R, vel_L, vel_R, Re_L, Re_R]', copyin='[norm_dir]') do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -1721,25 +1725,31 @@ contains do i = 1, num_dims vel_grad_L(i, 1) = (dqL_prim_dx_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) vel_grad_R(i, 1) = (dqR_prim_dx_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 if (num_dims > 1) then vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) end if + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 if (num_dims > 2) then vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) end if + #:endif + #:endif end do if (norm_dir == 1) then flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(1) + vel_grad_R(1, 1)*vel_R(1)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 if (num_dims > 1) then flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(2) + vel_grad_R(1, 2)*vel_R(2)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(2) + vel_grad_R(2, 1)*vel_R(2)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 if (num_dims > 2) then flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) @@ -1747,9 +1757,12 @@ contains flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(3) + vel_grad_R(1, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(3) + vel_grad_R(3, 1)*vel_R(3)) end if + #:endif end if + #:endif else if (norm_dir == 2) then + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) @@ -1758,6 +1771,7 @@ contains flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(1) + vel_grad_R(1, 2)*vel_R(1)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(1) + vel_grad_R(2, 1)*vel_R(1)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 if (num_dims > 2) then flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) @@ -1765,7 +1779,10 @@ contains flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(3) + vel_grad_R(2, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(3) + vel_grad_R(3, 2)*vel_R(3)) end if + #:endif + #:endif else + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) @@ -1780,7 +1797,7 @@ contains flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(2) + vel_grad_R(2, 3)*vel_R(2)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(2) + vel_grad_R(3, 2)*vel_R(2)) - + #:endif end if end if @@ -1790,41 +1807,54 @@ contains do i = 1, num_dims vel_grad_L(i, 1) = (dqL_prim_dx_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) vel_grad_R(i, 1) = (dqR_prim_dx_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 if (num_dims > 1) then vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) end if + #:endif + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 if (num_dims > 2) then vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) end if + #:endif end do if (norm_dir == 1) then flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(1) + vel_grad_R(1, 1)*vel_R(1)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 if (num_dims > 1) then flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 if (num_dims > 2) then flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) end if + #:endif end if + #:endif else if (norm_dir == 2) then + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 if (num_dims > 2) then flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) end if + #:endif + #:endif else + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) @@ -1833,7 +1863,7 @@ contains flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) - + #:endif end if end if @@ -1912,13 +1942,20 @@ contains integer, intent(in) :: norm_dir type(int_bounds_info), intent(in) :: ix, iy, iz +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(2) :: alpha_rho_L, alpha_rho_R + real(wp), dimension(2) :: alpha_L, alpha_R + real(wp), dimension(3) :: vel_L, vel_R +#:else real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R - real(wp) :: rho_L, rho_R + real(wp), dimension(num_fluids) :: alpha_L, alpha_R real(wp), dimension(num_dims) :: vel_L, vel_R +#:endif + + real(wp) :: rho_L, rho_R real(wp) :: pres_L, pres_R real(wp) :: E_L, E_R real(wp) :: H_L, H_R - real(wp), dimension(num_fluids) :: alpha_L, alpha_R real(wp), dimension(num_species) :: Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR real(wp), dimension(num_species) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 real(wp) :: Cp_avg, Cv_avg, T_avg, c_sum_Yi_Phi, eps @@ -1945,11 +1982,17 @@ contains real(wp) :: xi_L, xi_R !< Left and right wave speeds functions real(wp) :: xi_M, xi_P real(wp) :: xi_MP, xi_PP - +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: R0_L, R0_R + real(wp), dimension(3) :: V0_L, V0_R + real(wp), dimension(3) :: P0_L, P0_R + real(wp), dimension(3) :: pbw_L, pbw_R +#:else real(wp), dimension(nb) :: R0_L, R0_R real(wp), dimension(nb) :: V0_L, V0_R real(wp), dimension(nb) :: P0_L, P0_R real(wp), dimension(nb) :: pbw_L, pbw_R +#:endif real(wp) :: alpha_L_sum, alpha_R_sum, nbub_L, nbub_R real(wp) :: ptilde_L, ptilde_R @@ -1959,7 +2002,11 @@ contains real(wp) :: R3V2Lbar, R3V2Rbar real(wp), dimension(6) :: tau_e_L, tau_e_R +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: xi_field_L, xi_field_R +#:else real(wp), dimension(num_dims) :: xi_field_L, xi_field_R +#:endif real(wp) :: G_L, G_R real(wp) :: vel_L_rms, vel_R_rms, vel_avg_rms @@ -3564,7 +3611,7 @@ contains #:endfor ! Computing HLLC flux and source flux for Euler system of equations - if (viscous) then + if (viscous .or. dummy) then if (weno_Re_flux) then call s_compute_viscous_source_flux( & qL_prim_vf(momxb:momxe), & @@ -3671,181 +3718,183 @@ contains #:for NORM_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (norm_dir == ${NORM_DIR}$) then - #:block UNDEF_AMD - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel, alpha_L, alpha_R, rho, pres,E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, c, c_fast, pres_mag, U_L, U_R, U_starL, U_starR, U_doubleL, U_doubleR, F_L, F_R, F_starL, F_starR, F_hlld, s_L, s_R, s_M, s_starL, s_starR, pTot_L, pTot_R, p_star, rhoL_star, rhoR_star, E_starL, E_starR, sqrt_rhoL_star, sqrt_rhoR_star, denom_ds, sign_Bx, vL_star, vR_star, wL_star, wR_star, v_double, w_double, By_double, Bz_double, E_doubleL, E_doubleR, E_double]', copyin='[norm_dir]') - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end + $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_rho_L, alpha_rho_R, vel, alpha_L, alpha_R, rho, pres,E, H_no_mag, gamma, pi_inf, qv, vel_rms, B, c, c_fast, pres_mag, U_L, U_R, U_starL, U_starR, U_doubleL, U_doubleR, F_L, F_R, F_starL, F_starR, F_hlld, s_L, s_R, s_M, s_starL, s_starR, pTot_L, pTot_R, p_star, rhoL_star, rhoR_star, E_starL, E_starR, sqrt_rhoL_star, sqrt_rhoR_star, denom_ds, sign_Bx, vL_star, vR_star, wL_star, wR_star, v_double, w_double, By_double, Bz_double, E_doubleL, E_doubleR, E_double]', copyin='[norm_dir]') + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end - ! (1) Extract the left/right primitive states - do i = 1, contxe - alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) - alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) - end do + ! (1) Extract the left/right primitive states + do i = 1, contxe + alpha_rho_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, i) + alpha_rho_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, i) + end do - ! NOTE: unlike HLL & HLLC, vel_L here is permutated by dir_idx for simpler logic - do i = 1, num_vels - vel%L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) - vel%R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + dir_idx(i)) - end do + ! NOTE: unlike HLL & HLLC, vel_L here is permutated by dir_idx for simpler logic + do i = 1, num_vels + vel%L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(i)) + vel%R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, contxe + dir_idx(i)) + end do - vel_rms%L = sum(vel%L**2._wp) - vel_rms%R = sum(vel%R**2._wp) + vel_rms%L = sum(vel%L**2._wp) + vel_rms%R = sum(vel%R**2._wp) - do i = 1, num_fluids - alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) - alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) - end do + do i = 1, num_fluids + alpha_L(i) = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx + i) + alpha_R(i) = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx + i) + end do - pres%L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) - pres%R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) + pres%L = qL_prim_rs${XYZ}$_vf(j, k, l, E_idx) + pres%R = qR_prim_rs${XYZ}$_vf(j + 1, k, l, E_idx) - ! NOTE: unlike HLL, Bx, By, Bz are permutated by dir_idx for simpler logic - if (mhd) then - if (n == 0) then ! 1D: constant Bx; By, Bz as variables; only in x so not permutated - B%L = [Bx0, qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg), qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1)] - B%R = [Bx0, qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg), qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1)] - else ! 2D/3D: Bx, By, Bz as variables - B%L = [qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(1) - 1), & - qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(2) - 1), & - qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(3) - 1)] - B%R = [qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(1) - 1), & - qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(2) - 1), & - qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(3) - 1)] - end if + ! NOTE: unlike HLL, Bx, By, Bz are permutated by dir_idx for simpler logic + if (mhd) then + if (n == 0) then ! 1D: constant Bx; By, Bz as variables; only in x so not permutated + B%L = [Bx0, qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg), qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1)] + B%R = [Bx0, qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg), qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + 1)] + else ! 2D/3D: Bx, By, Bz as variables + B%L = [qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(1) - 1), & + qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(2) - 1), & + qL_prim_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(3) - 1)] + B%R = [qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(1) - 1), & + qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(2) - 1), & + qR_prim_rs${XYZ}$_vf(j + 1, k, l, B_idx%beg + dir_idx(3) - 1)] end if + end if - ! Sum properties of all fluid components - rho%L = 0._wp; gamma%L = 0._wp; pi_inf%L = 0._wp; qv%L = 0._wp - rho%R = 0._wp; gamma%R = 0._wp; pi_inf%R = 0._wp; qv%R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho%L = rho%L + alpha_rho_L(i) - gamma%L = gamma%L + alpha_L(i)*gammas(i) - pi_inf%L = pi_inf%L + alpha_L(i)*pi_infs(i) - qv%L = qv%L + alpha_rho_L(i)*qvs(i) - - rho%R = rho%R + alpha_rho_R(i) - gamma%R = gamma%R + alpha_R(i)*gammas(i) - pi_inf%R = pi_inf%R + alpha_R(i)*pi_infs(i) - qv%R = qv%R + alpha_rho_R(i)*qvs(i) - end do - - pres_mag%L = 0.5_wp*sum(B%L**2._wp) - pres_mag%R = 0.5_wp*sum(B%R**2._wp) - E%L = gamma%L*pres%L + pi_inf%L + 0.5_wp*rho%L*vel_rms%L + qv%L + pres_mag%L - E%R = gamma%R*pres%R + pi_inf%R + 0.5_wp*rho%R*vel_rms%R + qv%R + pres_mag%R ! includes magnetic energy - H_no_mag%L = (E%L + pres%L - pres_mag%L)/rho%L - H_no_mag%R = (E%R + pres%R - pres_mag%R)/rho%R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) - - ! (2) Compute fast wave speeds - call s_compute_speed_of_sound(pres%L, rho%L, gamma%L, pi_inf%L, H_no_mag%L, alpha_L, vel_rms%L, 0._wp, c%L, qv%L) - call s_compute_speed_of_sound(pres%R, rho%R, gamma%R, pi_inf%R, H_no_mag%R, alpha_R, vel_rms%R, 0._wp, c%R, qv%R) - call s_compute_fast_magnetosonic_speed(rho%L, c%L, B%L, norm_dir, c_fast%L, H_no_mag%L) - call s_compute_fast_magnetosonic_speed(rho%R, c%R, B%R, norm_dir, c_fast%R, H_no_mag%R) - - ! (3) Compute contact speed s_M [Miyoshi Equ. (38)] - s_L = min(vel%L(1) - c_fast%L, vel%R(1) - c_fast%R) - s_R = max(vel%R(1) + c_fast%R, vel%L(1) + c_fast%L) - - pTot_L = pres%L + pres_mag%L - pTot_R = pres%R + pres_mag%R - - s_M = (((s_R - vel%R(1))*rho%R*vel%R(1) - & - (s_L - vel%L(1))*rho%L*vel%L(1) - pTot_R + pTot_L)/ & - ((s_R - vel%R(1))*rho%R - (s_L - vel%L(1))*rho%L)) - - ! (4) Compute star state variables - rhoL_star = rho%L*(s_L - vel%L(1))/(s_L - s_M) - rhoR_star = rho%R*(s_R - vel%R(1))/(s_R - s_M) - p_star = pTot_L + rho%L*(s_L - vel%L(1))*(s_M - vel%L(1))/(s_L - s_M) - E_starL = ((s_L - vel%L(1))*E%L - pTot_L*vel%L(1) + p_star*s_M)/(s_L - s_M) - E_starR = ((s_R - vel%R(1))*E%R - pTot_R*vel%R(1) + p_star*s_M)/(s_R - s_M) - - ! (5) Compute left/right state vectors and fluxes - U_L = [rho%L, rho%L*vel%L(1:3), B%L(2:3), E%L] - U_starL = [rhoL_star, rhoL_star*s_M, rhoL_star*vel%L(2:3), B%L(2:3), E_starL] - U_R = [rho%R, rho%R*vel%R(1:3), B%R(2:3), E%R] - U_starR = [rhoR_star, rhoR_star*s_M, rhoR_star*vel%R(2:3), B%R(2:3), E_starR] - - ! Compute the left/right fluxes - F_L(1) = U_L(2) - F_L(2) = U_L(2)*vel%L(1) - B%L(1)*B%L(1) + pTot_L - F_L(3:4) = U_L(2)*vel%L(2:3) - B%L(1)*B%L(2:3) - F_L(5:6) = vel%L(1)*B%L(2:3) - vel%L(2:3)*B%L(1) - F_L(7) = (E%L + pTot_L)*vel%L(1) - B%L(1)*(vel%L(1)*B%L(1) + vel%L(2)*B%L(2) + vel%L(3)*B%L(3)) - - F_R(1) = U_R(2) - F_R(2) = U_R(2)*vel%R(1) - B%R(1)*B%R(1) + pTot_R - F_R(3:4) = U_R(2)*vel%R(2:3) - B%R(1)*B%R(2:3) - F_R(5:6) = vel%R(1)*B%R(2:3) - vel%R(2:3)*B%R(1) - F_R(7) = (E%R + pTot_R)*vel%R(1) - B%R(1)*(vel%R(1)*B%R(1) + vel%R(2)*B%R(2) + vel%R(3)*B%R(3)) - ! Compute the star flux using HLL relation - F_starL = F_L + s_L*(U_starL - U_L) - F_starR = F_R + s_R*(U_starR - U_R) - ! Compute the rotational (Alfvén) speeds - s_starL = s_M - abs(B%L(1))/sqrt(rhoL_star) - s_starR = s_M + abs(B%L(1))/sqrt(rhoR_star) - ! Compute the double–star states [Miyoshi Eqns. (59)-(62)] - sqrt_rhoL_star = sqrt(rhoL_star); sqrt_rhoR_star = sqrt(rhoR_star) - vL_star = vel%L(2); wL_star = vel%L(3) - vR_star = vel%R(2); wR_star = vel%R(3) - - ! (6) Compute the double–star states [Miyoshi Eqns. (59)-(62)] - denom_ds = sqrt_rhoL_star + sqrt_rhoR_star - sign_Bx = sign(1._wp, B%L(1)) - v_double = (sqrt_rhoL_star*vL_star + sqrt_rhoR_star*vR_star + (B%R(2) - B%L(2))*sign_Bx)/denom_ds - w_double = (sqrt_rhoL_star*wL_star + sqrt_rhoR_star*wR_star + (B%R(3) - B%L(3))*sign_Bx)/denom_ds - By_double = (sqrt_rhoL_star*B%R(2) + sqrt_rhoR_star*B%L(2) + sqrt_rhoL_star*sqrt_rhoR_star*(vR_star - vL_star)*sign_Bx)/denom_ds - Bz_double = (sqrt_rhoL_star*B%R(3) + sqrt_rhoR_star*B%L(3) + sqrt_rhoL_star*sqrt_rhoR_star*(wR_star - wL_star)*sign_Bx)/denom_ds - - E_doubleL = E_starL - sqrt_rhoL_star*((vL_star*B%L(2) + wL_star*B%L(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx - E_doubleR = E_starR + sqrt_rhoR_star*((vR_star*B%R(2) + wR_star*B%R(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx - E_double = 0.5_wp*(E_doubleL + E_doubleR) - - U_doubleL = [rhoL_star, rhoL_star*s_M, rhoL_star*v_double, rhoL_star*w_double, By_double, Bz_double, E_double] - U_doubleR = [rhoR_star, rhoR_star*s_M, rhoR_star*v_double, rhoR_star*w_double, By_double, Bz_double, E_double] - - ! (11) Choose HLLD flux based on wave-speed regions - if (0.0_wp <= s_L) then - F_hlld = F_L - else if (0.0_wp <= s_starL) then - F_hlld = F_L + s_L*(U_starL - U_L) - else if (0.0_wp <= s_M) then - F_hlld = F_starL + s_starL*(U_doubleL - U_starL) - else if (0.0_wp <= s_starR) then - F_hlld = F_starR + s_starR*(U_doubleR - U_starR) - else if (0.0_wp <= s_R) then - F_hlld = F_R + s_R*(U_starR - U_R) - else - F_hlld = F_R - end if + ! Sum properties of all fluid components + rho%L = 0._wp; gamma%L = 0._wp; pi_inf%L = 0._wp; qv%L = 0._wp + rho%R = 0._wp; gamma%R = 0._wp; pi_inf%R = 0._wp; qv%R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho%L = rho%L + alpha_rho_L(i) + gamma%L = gamma%L + alpha_L(i)*gammas(i) + pi_inf%L = pi_inf%L + alpha_L(i)*pi_infs(i) + qv%L = qv%L + alpha_rho_L(i)*qvs(i) + + rho%R = rho%R + alpha_rho_R(i) + gamma%R = gamma%R + alpha_R(i)*gammas(i) + pi_inf%R = pi_inf%R + alpha_R(i)*pi_infs(i) + qv%R = qv%R + alpha_rho_R(i)*qvs(i) + end do - ! (12) Reorder and write temporary variables to the flux array - ! Mass - flux_rs${XYZ}$_vf(j, k, l, 1) = F_hlld(1) ! TODO multi-component - ! Momentum - flux_rs${XYZ}$_vf(j, k, l, [contxe + dir_idx(1), contxe + dir_idx(2), contxe + dir_idx(3)]) = F_hlld([2, 3, 4]) - ! Magnetic field - if (n == 0) then - flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg, B_idx%beg + 1]) = F_hlld([5, 6]) - else - flux_rs${XYZ}$_vf(j, k, l, [B_idx%beg + dir_idx(2) - 1, B_idx%beg + dir_idx(3) - 1]) = F_hlld([5, 6]) - end if - ! Energy - flux_rs${XYZ}$_vf(j, k, l, E_idx) = F_hlld(7) - ! Partial fraction - $:GPU_LOOP(parallelism='[seq]') - do i = advxb, advxe - flux_rs${XYZ}$_vf(j, k, l, i) = 0._wp ! TODO multi-component (zero for now) - end do + pres_mag%L = 0.5_wp*sum(B%L**2._wp) + pres_mag%R = 0.5_wp*sum(B%R**2._wp) + E%L = gamma%L*pres%L + pi_inf%L + 0.5_wp*rho%L*vel_rms%L + qv%L + pres_mag%L + E%R = gamma%R*pres%R + pi_inf%R + 0.5_wp*rho%R*vel_rms%R + qv%R + pres_mag%R ! includes magnetic energy + H_no_mag%L = (E%L + pres%L - pres_mag%L)/rho%L + H_no_mag%R = (E%R + pres%R - pres_mag%R)/rho%R ! stagnation enthalpy here excludes magnetic energy (only used to find speed of sound) + + ! (2) Compute fast wave speeds + call s_compute_speed_of_sound(pres%L, rho%L, gamma%L, pi_inf%L, H_no_mag%L, alpha_L, vel_rms%L, 0._wp, c%L, qv%L) + call s_compute_speed_of_sound(pres%R, rho%R, gamma%R, pi_inf%R, H_no_mag%R, alpha_R, vel_rms%R, 0._wp, c%R, qv%R) + call s_compute_fast_magnetosonic_speed(rho%L, c%L, B%L, norm_dir, c_fast%L, H_no_mag%L) + call s_compute_fast_magnetosonic_speed(rho%R, c%R, B%R, norm_dir, c_fast%R, H_no_mag%R) + + ! (3) Compute contact speed s_M [Miyoshi Equ. (38)] + s_L = min(vel%L(1) - c_fast%L, vel%R(1) - c_fast%R) + s_R = max(vel%R(1) + c_fast%R, vel%L(1) + c_fast%L) + + pTot_L = pres%L + pres_mag%L + pTot_R = pres%R + pres_mag%R + + s_M = (((s_R - vel%R(1))*rho%R*vel%R(1) - & + (s_L - vel%L(1))*rho%L*vel%L(1) - pTot_R + pTot_L)/ & + ((s_R - vel%R(1))*rho%R - (s_L - vel%L(1))*rho%L)) + + ! (4) Compute star state variables + rhoL_star = rho%L*(s_L - vel%L(1))/(s_L - s_M) + rhoR_star = rho%R*(s_R - vel%R(1))/(s_R - s_M) + p_star = pTot_L + rho%L*(s_L - vel%L(1))*(s_M - vel%L(1))/(s_L - s_M) + E_starL = ((s_L - vel%L(1))*E%L - pTot_L*vel%L(1) + p_star*s_M)/(s_L - s_M) + E_starR = ((s_R - vel%R(1))*E%R - pTot_R*vel%R(1) + p_star*s_M)/(s_R - s_M) + + ! (5) Compute left/right state vectors and fluxes + U_L = [rho%L, rho%L*vel%L(1:3), B%L(2:3), E%L] + U_starL = [rhoL_star, rhoL_star*s_M, rhoL_star*vel%L(2:3), B%L(2:3), E_starL] + U_R = [rho%R, rho%R*vel%R(1:3), B%R(2:3), E%R] + U_starR = [rhoR_star, rhoR_star*s_M, rhoR_star*vel%R(2:3), B%R(2:3), E_starR] + + ! Compute the left/right fluxes + F_L(1) = U_L(2) + F_L(2) = U_L(2)*vel%L(1) - B%L(1)*B%L(1) + pTot_L + F_L(3:4) = U_L(2)*vel%L(2:3) - B%L(1)*B%L(2:3) + F_L(5:6) = vel%L(1)*B%L(2:3) - vel%L(2:3)*B%L(1) + F_L(7) = (E%L + pTot_L)*vel%L(1) - B%L(1)*(vel%L(1)*B%L(1) + vel%L(2)*B%L(2) + vel%L(3)*B%L(3)) + + F_R(1) = U_R(2) + F_R(2) = U_R(2)*vel%R(1) - B%R(1)*B%R(1) + pTot_R + F_R(3:4) = U_R(2)*vel%R(2:3) - B%R(1)*B%R(2:3) + F_R(5:6) = vel%R(1)*B%R(2:3) - vel%R(2:3)*B%R(1) + F_R(7) = (E%R + pTot_R)*vel%R(1) - B%R(1)*(vel%R(1)*B%R(1) + vel%R(2)*B%R(2) + vel%R(3)*B%R(3)) + ! Compute the star flux using HLL relation + F_starL = F_L + s_L*(U_starL - U_L) + F_starR = F_R + s_R*(U_starR - U_R) + ! Compute the rotational (Alfvén) speeds + s_starL = s_M - abs(B%L(1))/sqrt(rhoL_star) + s_starR = s_M + abs(B%L(1))/sqrt(rhoR_star) + ! Compute the double–star states [Miyoshi Eqns. (59)-(62)] + sqrt_rhoL_star = sqrt(rhoL_star); sqrt_rhoR_star = sqrt(rhoR_star) + vL_star = vel%L(2); wL_star = vel%L(3) + vR_star = vel%R(2); wR_star = vel%R(3) + + ! (6) Compute the double–star states [Miyoshi Eqns. (59)-(62)] + denom_ds = sqrt_rhoL_star + sqrt_rhoR_star + sign_Bx = sign(1._wp, B%L(1)) + v_double = (sqrt_rhoL_star*vL_star + sqrt_rhoR_star*vR_star + (B%R(2) - B%L(2))*sign_Bx)/denom_ds + w_double = (sqrt_rhoL_star*wL_star + sqrt_rhoR_star*wR_star + (B%R(3) - B%L(3))*sign_Bx)/denom_ds + By_double = (sqrt_rhoL_star*B%R(2) + sqrt_rhoR_star*B%L(2) + sqrt_rhoL_star*sqrt_rhoR_star*(vR_star - vL_star)*sign_Bx)/denom_ds + Bz_double = (sqrt_rhoL_star*B%R(3) + sqrt_rhoR_star*B%L(3) + sqrt_rhoL_star*sqrt_rhoR_star*(wR_star - wL_star)*sign_Bx)/denom_ds + + E_doubleL = E_starL - sqrt_rhoL_star*((vL_star*B%L(2) + wL_star*B%L(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx + E_doubleR = E_starR + sqrt_rhoR_star*((vR_star*B%R(2) + wR_star*B%R(3)) - (v_double*By_double + w_double*Bz_double))*sign_Bx + E_double = 0.5_wp*(E_doubleL + E_doubleR) + + U_doubleL = [rhoL_star, rhoL_star*s_M, rhoL_star*v_double, rhoL_star*w_double, By_double, Bz_double, E_double] + U_doubleR = [rhoR_star, rhoR_star*s_M, rhoR_star*v_double, rhoR_star*w_double, By_double, Bz_double, E_double] + + ! (11) Choose HLLD flux based on wave-speed regions + if (0.0_wp <= s_L) then + F_hlld = F_L + else if (0.0_wp <= s_starL) then + F_hlld = F_L + s_L*(U_starL - U_L) + else if (0.0_wp <= s_M) then + F_hlld = F_starL + s_starL*(U_doubleL - U_starL) + else if (0.0_wp <= s_starR) then + F_hlld = F_starR + s_starR*(U_doubleR - U_starR) + else if (0.0_wp <= s_R) then + F_hlld = F_R + s_R*(U_starR - U_R) + else + F_hlld = F_R + end if - flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp + ! (12) Reorder and write temporary variables to the flux array + ! Mass + flux_rs${XYZ}$_vf(j, k, l, 1) = F_hlld(1) ! TODO multi-component + ! Momentum + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(1)) = F_hlld(2) + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(2)) = F_hlld(3) + flux_rs${XYZ}$_vf(j, k, l, contxe + dir_idx(3)) = F_hlld(4) + ! Magnetic field + if (n == 0) then + flux_rs${XYZ}$_vf(j, k, l, B_idx%beg) = F_hlld(5) + flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + 1) = F_hlld(6) + else + flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(2) - 1) = F_hlld(5) + flux_rs${XYZ}$_vf(j, k, l, B_idx%beg + dir_idx(3) - 1) = F_hlld(6) + end if + ! Energy + flux_rs${XYZ}$_vf(j, k, l, E_idx) = F_hlld(7) + ! Partial fraction + $:GPU_LOOP(parallelism='[seq]') + do i = advxb, advxe + flux_rs${XYZ}$_vf(j, k, l, i) = 0._wp ! TODO multi-component (zero for now) end do + + flux_src_rs${XYZ}$_vf(j, k, l, advxb) = 0._wp end do end do - $:END_GPU_PARALLEL_LOOP() - #:endblock UNDEF_AMD + end do + $:END_GPU_PARALLEL_LOOP() end if #:endfor @@ -4059,7 +4108,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - if (viscous) then + if (viscous .or. dummy) then $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do l = isz%beg, isz%end @@ -4118,7 +4167,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - if (viscous) then + if (viscous .or. dummy) then $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe @@ -4181,7 +4230,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - if (viscous) then + if (viscous .or. dummy) then $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe @@ -4235,7 +4284,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - if (viscous) then + if (viscous .or. dummy) then $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe @@ -4292,7 +4341,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - if (viscous) then + if (viscous .or. dummy) then $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end @@ -4340,7 +4389,7 @@ contains end do $:END_GPU_PARALLEL_LOOP() - if (viscous) then + if (viscous .or. dummy) then $:GPU_PARALLEL_LOOP(collapse=3) do i = momxb, momxe do k = isy%beg, isy%end @@ -4414,7 +4463,7 @@ contains if (norm_dir == 1) then - if (viscous .or. (surface_tension)) then + if (viscous .or. (surface_tension) .or. dummy) then $:GPU_PARALLEL_LOOP(collapse=4) do i = momxb, E_idx @@ -4462,7 +4511,7 @@ contains ! Reshaping Inputted Data in y-direction elseif (norm_dir == 2) then - if (viscous .or. (surface_tension)) then + if (viscous .or. (surface_tension) .or. dummy) then $:GPU_PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do l = is3%beg, is3%end @@ -4509,7 +4558,7 @@ contains ! Reshaping Inputted Data in z-direction else - if (viscous .or. (surface_tension)) then + if (viscous .or. (surface_tension) .or. dummy) then $:GPU_PARALLEL_LOOP(collapse=4) do i = momxb, E_idx do j = is1%beg, is1%end @@ -4658,12 +4707,14 @@ contains end select ! Divergence in cylindrical coordinates (vx=vz_cyl, vy=vr_cyl, vz=vtheta_cyl) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 divergence_cyl = avg_dvdx_int(1) + avg_dvdy_int(2) + avg_v_int(2)/r_eff if (num_dims > 2) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 divergence_cyl = divergence_cyl + avg_dvdz_int(3)/r_eff #:endif end if + #:endif stress_vector_shear = 0.0_wp stress_normal_bulk = 0.0_wp @@ -4675,7 +4726,9 @@ contains case (1) ! X-face (axial normal, z_cyl) stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const if (num_dims > 1) then + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 stress_vector_shear(2) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s + #:endif end if if (num_dims > 2) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 @@ -4684,6 +4737,7 @@ contains end if case (2) ! Y-face (radial normal, r_cyl) if (num_dims > 1) then + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 stress_vector_shear(1) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s stress_vector_shear(2) = (2.0_wp*avg_dvdy_int(2))/Re_s + div_v_term_const if (num_dims > 2) then @@ -4691,6 +4745,7 @@ contains stress_vector_shear(3) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s #:endif end if + #:endif else stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const end if @@ -4790,8 +4845,10 @@ contains vel_grad_avg(vel_comp_idx, 1) = 0.5_wp*(dvelL_dx_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & dvelR_dx_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) if (num_dims > 1) then + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 vel_grad_avg(vel_comp_idx, 2) = 0.5_wp*(dvelL_dy_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & dvelR_dy_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) + #:endif end if if (num_dims > 2) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index 0aa632d3ef..873a614cbb 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -97,14 +97,22 @@ contains & cray_inline=True) type(scalar_field), intent(in), dimension(sys_size) :: q_prim_vf +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), intent(inout), dimension(2) :: alpha + real(wp), intent(inout), dimension(3) :: vel +#:else real(wp), intent(inout), dimension(num_fluids) :: alpha real(wp), intent(inout), dimension(num_vels) :: vel +#:endif real(wp), intent(inout) :: rho, gamma, pi_inf, vel_sum, H, pres real(wp), intent(out) :: qv integer, intent(in) :: j, k, l real(wp), dimension(2), intent(inout) :: Re - +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(2) :: alpha_rho, Gs +#:else real(wp), dimension(num_fluids) :: alpha_rho, Gs +#:endif real(wp) :: E, G_local integer :: i diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index e6f311cf69..86284622c6 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1373,9 +1373,10 @@ contains ! Computation of parameters, allocation of memory, association of pointers, ! and/or execution of any other tasks that are needed to properly configure ! the modules. The preparations below DO DEPEND on the grid being complete. - if (igr) then + if (igr .or. dummy) then call s_initialize_igr_module() - else + end if + if(.not. igr .or. dummy) then if (recon_type == WENO_TYPE) then call s_initialize_weno_module() elseif (recon_type == MUSCL_TYPE) then @@ -1534,14 +1535,14 @@ contains #:if not MFC_CASE_OPTIMIZATION $:GPU_UPDATE(device='[igr,nb,igr_order]') #:endif - #:block DEF_AMD + #:if USING_AMD block use m_thermochem, only: molecular_weights use m_chemistry, only: molecular_weights_nonparameter molecular_weights_nonparameter(:) = molecular_weights(:) $:GPU_UPDATE(device='[molecular_weights_nonparameter]') end block - #:endblock + #:endif end subroutine s_initialize_gpu_vars diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 86e187dca1..d51b67f659 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -134,7 +134,7 @@ contains $:END_GPU_PARALLEL_LOOP() elseif (id == 2) then - + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 $:GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') do l = isz%beg, isz%end do k = isy%beg, isy%end @@ -178,6 +178,7 @@ contains end do end do $:END_GPU_PARALLEL_LOOP() + #:endif elseif (id == 3) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 @@ -324,7 +325,7 @@ contains integer :: i, j, k, l #:for SCHEME, TYPE in [('weno', 'WENO_TYPE'),('muscl', 'MUSCL_TYPE')] - if (recon_type == ${TYPE}$) then + if (recon_type == ${TYPE}$ .or. dummy) then ! Reconstruction in s1-direction if (norm_dir == 1) then @@ -345,49 +346,49 @@ contains end if $:GPU_UPDATE(device='[is1,is2,is3,iv]') + end if + #:endfor - if (recon_dir == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) - end do - end do + if (recon_dir == 1) then + $:GPU_PARALLEL_LOOP(collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_x(j, k, l, i) = v_vf(i)%sf(j, k, l) + vR_x(j, k, l, i) = v_vf(i)%sf(j, k, l) end do end do - $:END_GPU_PARALLEL_LOOP() - else if (recon_dir == 2) then - $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) - end do - end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + else if (recon_dir == 2) then + $:GPU_PARALLEL_LOOP(collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_y(j, k, l, i) = v_vf(i)%sf(k, j, l) + vR_y(j, k, l, i) = v_vf(i)%sf(k, j, l) end do end do - $:END_GPU_PARALLEL_LOOP() - else if (recon_dir == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3%beg, is3%end - do k = is2%beg, is2%end - do j = is1%beg, is1%end - vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) - end do - end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + else if (recon_dir == 3) then + $:GPU_PARALLEL_LOOP(collapse=4) + do i = iv%beg, iv%end + do l = is3%beg, is3%end + do k = is2%beg, is2%end + do j = is1%beg, is1%end + vL_z(j, k, l, i) = v_vf(i)%sf(l, k, j) + vR_z(j, k, l, i) = v_vf(i)%sf(l, k, j) end do end do - $:END_GPU_PARALLEL_LOOP() - end if - end if - #:endfor + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if end subroutine s_reconstruct_cell_boundary_values_capillary diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 8b18381ffb..cd4be44028 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -526,9 +526,10 @@ contains if (s == 1) then if (run_time_info) then - if (igr) then + if (igr .or. dummy) then call s_write_run_time_information(q_cons_ts(1)%vf, t_step) - else + end if + if(.not. igr .or. dummy) then call s_write_run_time_information(q_prim_vf, t_step) end if end if @@ -732,7 +733,7 @@ contains real(wp) :: dt_local integer :: j, k, l !< Generic loop iterators - if (.not. igr) then + if (.not. igr .or. dummy) then call s_convert_conservative_to_primitive_variables( & q_cons_ts(1)%vf, & q_T_sf, & diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index c6b053bdaf..f3fc59ee43 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -95,216 +95,220 @@ contains end do $:END_GPU_PARALLEL_LOOP() - if (shear_stress) then ! Shear stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') - do l = is3_viscous%beg, is3_viscous%end - do k = -1, 1 - do j = is1_viscous%beg, is1_viscous%end + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 + if (shear_stress) then ! Shear stresses + $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + do l = is3_viscous%beg, is3_viscous%end + do k = -1, 1 + do j = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) - if (bubbles_euler .and. num_fluids == 1) then - alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) + if (bubbles_euler .and. num_fluids == 1) then + alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) + else + alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) + end if + end do + + if (bubbles_euler) then + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp + + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else if ((model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else + rho_visc = alpha_rho_visc(1) + gamma_visc = gammas(1) + pi_inf_visc = pi_infs(1) + end if else - alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) - end if - end do + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp - if (bubbles_euler) then - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp + alpha_visc_sum = 0._wp + + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) + alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) + alpha_visc_sum = alpha_visc_sum + alpha_visc(i) + end do + + alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + + end if - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else - rho_visc = alpha_rho_visc(1) - gamma_visc = gammas(1) - pi_inf_visc = pi_infs(1) - end if - else - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - alpha_visc_sum = 0._wp + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_visc(i) = dflt_real - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) - alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) - alpha_visc_sum = alpha_visc_sum + alpha_visc(i) - end do + if (Re_size(i) > 0) Re_visc(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + + Re_visc(i) + end do - alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) + Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) + end do + end if end if + tau_Re(2, 1) = (grad_y_vf(1)%sf(j, k, l) + & + grad_x_vf(2)%sf(j, k, l))/ & + Re_visc(1) + + tau_Re(2, 2) = (4._wp*grad_y_vf(2)%sf(j, k, l) & + - 2._wp*grad_x_vf(1)%sf(j, k, l) & + - 2._wp*q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & + (3._wp*Re_visc(1)) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + tau_Re_vf(contxe + i)%sf(j, k, l) = & + tau_Re_vf(contxe + i)%sf(j, k, l) - & + tau_Re(2, i) + + tau_Re_vf(E_idx)%sf(j, k, l) = & + tau_Re_vf(E_idx)%sf(j, k, l) - & + q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) + end do + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end if + #:endif + + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 + if (bulk_stress) then ! Bulk stresses + $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + do l = is3_viscous%beg, is3_viscous%end + do k = -1, 1 + do j = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) + if (bubbles_euler .and. num_fluids == 1) then + alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) + else + alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) + end if end do - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_visc(i) = dflt_real + if (bubbles_euler) then + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp - if (Re_size(i) > 0) Re_visc(i) = 0._wp + if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) + end do + else if ((model_eqns == 2) .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & - + Re_visc(i) + do i = 1, num_fluids - 1 + rho_visc = rho_visc + alpha_rho_visc(i) + gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) + pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do + else + rho_visc = alpha_rho_visc(1) + gamma_visc = gammas(1) + pi_inf_visc = pi_infs(1) + end if + else + rho_visc = 0._wp + gamma_visc = 0._wp + pi_inf_visc = 0._wp - Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) - - end do - end if - end if - - tau_Re(2, 1) = (grad_y_vf(1)%sf(j, k, l) + & - grad_x_vf(2)%sf(j, k, l))/ & - Re_visc(1) - - tau_Re(2, 2) = (4._wp*grad_y_vf(2)%sf(j, k, l) & - - 2._wp*grad_x_vf(1)%sf(j, k, l) & - - 2._wp*q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & - (3._wp*Re_visc(1)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - tau_Re_vf(contxe + i)%sf(j, k, l) = & - tau_Re_vf(contxe + i)%sf(j, k, l) - & - tau_Re(2, i) - - tau_Re_vf(E_idx)%sf(j, k, l) = & - tau_Re_vf(E_idx)%sf(j, k, l) - & - q_prim_vf(contxe + i)%sf(j, k, l)*tau_Re(2, i) - end do - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - end if + alpha_visc_sum = 0._wp - if (bulk_stress) then ! Bulk stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') - do l = is3_viscous%beg, is3_viscous%end - do k = -1, 1 - do j = is1_viscous%beg, is1_viscous%end + if (mpp_lim) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) + alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) + alpha_visc_sum = alpha_visc_sum + alpha_visc(i) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = q_prim_vf(i)%sf(j, k, l) - if (bubbles_euler .and. num_fluids == 1) then - alpha_visc(i) = 1._wp - q_prim_vf(E_idx + i)%sf(j, k, l) - else - alpha_visc(i) = q_prim_vf(E_idx + i)%sf(j, k, l) - end if - end do + alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) - if (bubbles_euler) then - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp + end if - if (mpp_lim .and. (model_eqns == 2) .and. (num_fluids > 2)) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids rho_visc = rho_visc + alpha_rho_visc(i) gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) end do - else if ((model_eqns == 2) .and. (num_fluids > 2)) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do - else - rho_visc = alpha_rho_visc(1) - gamma_visc = gammas(1) - pi_inf_visc = pi_infs(1) - end if - else - rho_visc = 0._wp - gamma_visc = 0._wp - pi_inf_visc = 0._wp - - alpha_visc_sum = 0._wp - - if (mpp_lim) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_visc(i) = max(0._wp, alpha_rho_visc(i)) - alpha_visc(i) = min(max(0._wp, alpha_visc(i)), 1._wp) - alpha_visc_sum = alpha_visc_sum + alpha_visc(i) - end do - alpha_visc = alpha_visc/max(alpha_visc_sum, sgm_eps) - - end if + if (viscous) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, 2 + Re_visc(i) = dflt_real - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_visc = rho_visc + alpha_rho_visc(i) - gamma_visc = gamma_visc + alpha_visc(i)*gammas(i) - pi_inf_visc = pi_inf_visc + alpha_visc(i)*pi_infs(i) - end do + if (Re_size(i) > 0) Re_visc(i) = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do q = 1, Re_size(i) + Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & + + Re_visc(i) + end do - if (viscous) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, 2 - Re_visc(i) = dflt_real + Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) - if (Re_size(i) > 0) Re_visc(i) = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = 1, Re_size(i) - Re_visc(i) = alpha_visc(Re_idx(i, q))/Res_viscous(i, q) & - + Re_visc(i) end do - - Re_visc(i) = 1._wp/max(Re_visc(i), sgm_eps) - - end do + end if end if - end if - tau_Re(2, 2) = (grad_x_vf(1)%sf(j, k, l) + & - grad_y_vf(2)%sf(j, k, l) + & - q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & - Re_visc(2) + tau_Re(2, 2) = (grad_x_vf(1)%sf(j, k, l) + & + grad_y_vf(2)%sf(j, k, l) + & + q_prim_vf(momxb + 1)%sf(j, k, l)/y_cc(k))/ & + Re_visc(2) - tau_Re_vf(momxb + 1)%sf(j, k, l) = & - tau_Re_vf(momxb + 1)%sf(j, k, l) - & - tau_Re(2, 2) + tau_Re_vf(momxb + 1)%sf(j, k, l) = & + tau_Re_vf(momxb + 1)%sf(j, k, l) - & + tau_Re(2, 2) - tau_Re_vf(E_idx)%sf(j, k, l) = & - tau_Re_vf(E_idx)%sf(j, k, l) - & - q_prim_vf(momxb + 1)%sf(j, k, l)*tau_Re(2, 2) + tau_Re_vf(E_idx)%sf(j, k, l) = & + tau_Re_vf(E_idx)%sf(j, k, l) - & + q_prim_vf(momxb + 1)%sf(j, k, l)*tau_Re(2, 2) + end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() - end if + $:END_GPU_PARALLEL_LOOP() + end if + #:endif if (p == 0) return #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 @@ -636,120 +640,123 @@ contains if (n > 0) then - $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg + 1, is2_viscous%end - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dy_n(2)%vf(i)%sf(k, j, l) = & - (q_prim_qp%vf(i)%sf(k, j, l) - & - q_prim_qp%vf(i)%sf(k, j - 1, l))/ & - (y_cc(j) - y_cc(j - 1)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 + $:GPU_PARALLEL_LOOP(collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg + 1, is2_viscous%end + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dy_n(2)%vf(i)%sf(k, j, l) = & + (q_prim_qp%vf(i)%sf(k, j, l) - & + q_prim_qp%vf(i)%sf(k, j - 1, l))/ & + (y_cc(j) - y_cc(j - 1)) + end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg, is2_viscous%end - 1 - do k = is1_viscous%beg, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dy_n(2)%vf(i)%sf(k, j, l) = & - (q_prim_qp%vf(i)%sf(k, j + 1, l) - & - q_prim_qp%vf(i)%sf(k, j, l))/ & - (y_cc(j + 1) - y_cc(j)) + $:GPU_PARALLEL_LOOP(collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg, is2_viscous%end - 1 + do k = is1_viscous%beg, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dy_n(2)%vf(i)%sf(k, j, l) = & + (q_prim_qp%vf(i)%sf(k, j + 1, l) - & + q_prim_qp%vf(i)%sf(k, j, l))/ & + (y_cc(j + 1) - y_cc(j)) + end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg + 1, is2_viscous%end - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j, l) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, j - 1, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j - 1, l)) - - dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) + $:GPU_PARALLEL_LOOP(collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg + 1, is2_viscous%end + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j, l) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, j - 1, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j - 1, l)) + + dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqL_prim_dx_n(2)%vf(i)%sf(k, j, l) + end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do j = is2_viscous%beg, is2_viscous%end - 1 - do k = is1_viscous%beg + 1, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = & - (dqL_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & - dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & - dqR_prim_dx_n(1)%vf(i)%sf(k, j, l)) + $:GPU_PARALLEL_LOOP(collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do j = is2_viscous%beg, is2_viscous%end - 1 + do k = is1_viscous%beg + 1, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = & + (dqL_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j + 1, l) + & + dqL_prim_dx_n(1)%vf(i)%sf(k, j, l) + & + dqR_prim_dx_n(1)%vf(i)%sf(k, j, l)) - dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & - dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) + dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) = 25.e-2_wp* & + dqR_prim_dx_n(2)%vf(i)%sf(k, j, l) + end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg + 1, is2_viscous%end - 1 - do j = is1_viscous%beg + 1, is1_viscous%end - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j, k, l) + & - dqL_prim_dy_n(2)%vf(i)%sf(j - 1, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j - 1, k, l)) + $:GPU_PARALLEL_LOOP(collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg + 1, is2_viscous%end - 1 + do j = is1_viscous%beg + 1, is1_viscous%end + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j, k, l) + & + dqL_prim_dy_n(2)%vf(i)%sf(j - 1, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j - 1, k, l)) - dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) + dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqL_prim_dy_n(1)%vf(i)%sf(j, k, l) + end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + $:END_GPU_PARALLEL_LOOP() - $:GPU_PARALLEL_LOOP(collapse=3) - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg + 1, is2_viscous%end - 1 - do j = is1_viscous%beg, is1_viscous%end - 1 - $:GPU_LOOP(parallelism='[seq]') - do i = iv%beg, iv%end - dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = & - (dqL_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & - dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & - dqR_prim_dy_n(2)%vf(i)%sf(j, k, l)) + $:GPU_PARALLEL_LOOP(collapse=3) + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg + 1, is2_viscous%end - 1 + do j = is1_viscous%beg, is1_viscous%end - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = iv%beg, iv%end + dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = & + (dqL_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j + 1, k, l) + & + dqL_prim_dy_n(2)%vf(i)%sf(j, k, l) + & + dqR_prim_dy_n(2)%vf(i)%sf(j, k, l)) - dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & - dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) + dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) = 25.e-2_wp* & + dqR_prim_dy_n(1)%vf(i)%sf(j, k, l) + end do end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + $:END_GPU_PARALLEL_LOOP() + + #:endif if (p > 0) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 @@ -1009,7 +1016,7 @@ contains integer :: i, j, k, l #:for SCHEME, TYPE in [('weno','WENO_TYPE'), ('muscl','MUSCL_TYPE')] - if (recon_type == ${TYPE}$) then + if (recon_type == ${TYPE}$ .or. dummy) then ! Reconstruction in s1-direction if (norm_dir == 1) then @@ -1048,53 +1055,53 @@ contains recon_dir, & is1_viscous, is2_viscous, is3_viscous) end if + end if + #:endfor - if (viscous) then - if (weno_Re_flux) then - if (norm_dir == 2) then - $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) - vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) - end do - end do + if (viscous .or. dummy) then + if (weno_Re_flux) then + if (norm_dir == 2) then + $:GPU_PARALLEL_LOOP(collapse=4) + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) + vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) end do end do - $:END_GPU_PARALLEL_LOOP() - elseif (norm_dir == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do l = is3_viscous%beg, is3_viscous%end - vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) - vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) - end do - end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + elseif (norm_dir == 3) then + $:GPU_PARALLEL_LOOP(collapse=4) + do i = iv%beg, iv%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do l = is3_viscous%beg, is3_viscous%end + vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) + vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) end do end do - $:END_GPU_PARALLEL_LOOP() - elseif (norm_dir == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) - vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) - end do - end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + elseif (norm_dir == 1) then + $:GPU_PARALLEL_LOOP(collapse=4) + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) + vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) end do end do - $:END_GPU_PARALLEL_LOOP() - end if - end if + end do + end do + $:END_GPU_PARALLEL_LOOP() end if end if - #:endfor + end if end subroutine s_reconstruct_cell_boundary_values_visc @@ -1152,7 +1159,8 @@ contains is1_viscous, is2_viscous, is3_viscous) end if - if (viscous) then + if (viscous .or. dummy) then + #:if not MFC_CASE_OPTIMIZATION or viscous if (weno_Re_flux) then if (norm_dir == 2) then $:GPU_PARALLEL_LOOP(collapse=4) @@ -1195,6 +1203,7 @@ contains $:END_GPU_PARALLEL_LOOP() end if end if + #:endif end if end if diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 3422dab8ec..6e8b668326 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -647,12 +647,21 @@ contains integer, intent(in) :: weno_dir type(int_bounds_info), intent(in) :: is1_weno_d, is2_weno_d, is3_weno_d +#:if USING_AMD and not MFC_CASE_OPTIMIZATION + real(wp), dimension(-3:2) :: dvd + real(wp), dimension(0:4) :: poly + real(wp), dimension(0:4) :: alpha + real(wp), dimension(0:4) :: omega + real(wp), dimension(0:4) :: beta + real(wp), dimension(0:4) :: delta +#:else real(wp), dimension(-weno_polyn:weno_polyn - 1) :: dvd real(wp), dimension(0:weno_num_stencils) :: poly real(wp), dimension(0:weno_num_stencils) :: alpha real(wp), dimension(0:weno_num_stencils) :: omega real(wp), dimension(0:weno_num_stencils) :: beta real(wp), dimension(0:weno_num_stencils) :: delta +#:endif real(wp), dimension(-3:3) :: v ! temporary field value array for clarity (WENO7 only) real(wp) :: tau @@ -664,12 +673,12 @@ contains $:GPU_UPDATE(device='[is1_weno,is2_weno,is3_weno]') - if (weno_order /= 1) then + if (weno_order /= 1 .or. dummy) then call s_initialize_weno(v_vf, & weno_dir) end if - if (weno_order == 1) then + if (weno_order == 1 .or. dummy) then if (weno_dir == 1) then $:GPU_PARALLEL_LOOP(collapse=4) do i = 1, ubound(v_vf, 1) @@ -710,7 +719,8 @@ contains end do $:END_GPU_PARALLEL_LOOP() end if - elseif (weno_order == 3) then + end if + if (weno_order == 3 .or. dummy) then #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then $:GPU_PARALLEL_LOOP(collapse=4,private='[beta,dvd,poly,omega,alpha,tau]') @@ -720,6 +730,10 @@ contains do i = 1, v_size ! reconstruct from left side + alpha(:) = 0._wp + omega(:) = 0._wp + beta(:) = weno_eps + dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & - v_rs_ws_${XYZ}$ (j, k, l, i) dvd(-1) = v_rs_ws_${XYZ}$ (j, k, l, i) & @@ -736,19 +750,19 @@ contains + weno_eps if (wenojs) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) elseif (mapped_weno) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) omega = alpha/sum(alpha) - alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) + alpha(0:weno_num_stencils) = (d_cbL_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + d_cbL_${XYZ}$ (0:weno_num_stencils, j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & + *(omega(0:weno_num_stencils)/(d_cbL_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbL_${XYZ}$ (0:weno_num_stencils, j)))) elseif (wenoz) then ! Borges, et al. (2008) tau = abs(beta(1) - beta(0)) - alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) + alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + tau/beta(0:weno_num_stencils)) end if @@ -764,17 +778,17 @@ contains + poly_coef_cbR_${XYZ}$ (j, 1, 0)*dvd(-1) if (wenojs) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) elseif (mapped_weno) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) omega = alpha/sum(alpha) - alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) + alpha(0:weno_num_stencils) = (d_cbR_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + d_cbR_${XYZ}$ (0:weno_num_stencils, j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & + *(omega(0:weno_num_stencils)/(d_cbR_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbR_${XYZ}$ (0:weno_num_stencils, j)))) elseif (wenoz) then - alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) + alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + tau/beta(0:weno_num_stencils)) end if @@ -789,7 +803,8 @@ contains $:END_GPU_PARALLEL_LOOP() end if #:endfor - elseif (weno_order == 5) then + end if + if (weno_order == 5 .or. dummy) then #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 1 #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then @@ -834,29 +849,30 @@ contains + weno_eps if (wenojs) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) elseif (mapped_weno) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) omega = alpha/sum(alpha) - alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) + alpha(0:weno_num_stencils) = (d_cbL_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + d_cbL_${XYZ}$ (0:weno_num_stencils, j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & + *(omega(0:weno_num_stencils)/(d_cbL_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbL_${XYZ}$ (0:weno_num_stencils, j)))) elseif (wenoz) then + ! Borges, et al. (2008) tau = abs(beta(2) - beta(0)) ! Equation 25 - alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + tau/beta) ! Equation 28 (note: weno_eps was already added to beta) + alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + tau/beta(0:weno_num_stencils)) ! Equation 28 (note: weno_eps was already added to beta) elseif (teno) then - ! Fu, et al. (2016) + ! Fu, et al. (2016) ! Fu''s code: https://dx.doi.org/10.13140/RG.2.2.36250.34247 tau = abs(beta(2) - beta(0)) alpha = 1._wp + tau/beta ! Equation 22 (reuse alpha as gamma; pick C=1 & q=6) alpha = (alpha*alpha*alpha)**2._wp ! Equation 22 cont. (some CPU compilers cannot optimize x**6.0) omega = alpha/sum(alpha) ! Equation 25 (reuse omega as xi) delta = merge(0._wp, 1._wp, omega < teno_CT)! Equation 26 - alpha = delta*d_cbL_${XYZ}$ (:, j) ! Equation 27 + alpha(0:weno_num_stencils) = delta(0:weno_num_stencils)*d_cbL_${XYZ}$ (0:weno_num_stencils, j) ! Equation 27 end if @@ -876,21 +892,22 @@ contains + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(-1) & + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-2) + if (wenojs) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) elseif (mapped_weno) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) omega = alpha/sum(alpha) - alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) + alpha(0:weno_num_stencils) = (d_cbR_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + d_cbR_${XYZ}$ (0:weno_num_stencils, j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & + *(omega(0:weno_num_stencils)/(d_cbR_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbR_${XYZ}$ (0:weno_num_stencils, j)))) elseif (wenoz) then - alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + tau/beta) + alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + tau/beta(0:weno_num_stencils)) elseif (teno) then - alpha = delta*d_cbR_${XYZ}$ (:, j) + alpha(0:weno_num_stencils) = delta(0:weno_num_stencils)*d_cbR_${XYZ}$ (0:weno_num_stencils, j) end if @@ -911,7 +928,8 @@ contains end if #:endfor #:endif - elseif (weno_order == 7) then + end if + if (weno_order == 7 .or. dummy) then #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 2 #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then @@ -1026,28 +1044,28 @@ contains end if if (wenojs) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) elseif (mapped_weno) then - alpha = d_cbL_${XYZ}$ (:, j)/(beta*beta) + alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) omega = alpha/sum(alpha) - alpha = (d_cbL_${XYZ}$ (:, j)*(1._wp + d_cbL_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbL_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbL_${XYZ}$ (:, j)))) + alpha(0:weno_num_stencils) = (d_cbL_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + d_cbL_${XYZ}$ (0:weno_num_stencils, j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & + *(omega(0:weno_num_stencils)/(d_cbL_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbL_${XYZ}$ (0:weno_num_stencils, j)))) elseif (wenoz) then ! Castro, et al. (2010) ! Don & Borges (2013) also helps tau = abs(beta(3) - beta(0)) ! Equation 50 - alpha = d_cbL_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) ! q = 2,3,4 for stability + alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + (tau/beta(0:weno_num_stencils))**wenoz_q) ! q = 2,3,4 for stability elseif (teno) then #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 tau = abs(beta(4) - beta(3)) ! Note the reordering of stencils alpha = 1._wp + tau/beta - alpha = (alpha*alpha*alpha)**2._wp ! some CPU compilers cannot optimize x**6.0 + alpha = (alpha**3._wp)**2._wp ! some CPU compilers cannot optimize x**6.0 omega = alpha/sum(alpha) delta = merge(0._wp, 1._wp, omega < teno_CT) - alpha = delta*d_cbL_${XYZ}$ (:, j) + alpha(0:weno_num_stencils) = delta(0:weno_num_stencils)*d_cbL_${XYZ}$ (0:weno_num_stencils, j) #:endif end if @@ -1083,19 +1101,20 @@ contains end if if (wenojs) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) elseif (mapped_weno) then - alpha = d_cbR_${XYZ}$ (:, j)/(beta*beta) + alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) omega = alpha/sum(alpha) - alpha = (d_cbR_${XYZ}$ (:, j)*(1._wp + d_cbR_${XYZ}$ (:, j) - 3._wp*omega) + omega**2._wp) & - *(omega/(d_cbR_${XYZ}$ (:, j)**2._wp + omega*(1._wp - 2._wp*d_cbR_${XYZ}$ (:, j)))) + alpha(0:weno_num_stencils) = (d_cbR_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + d_cbR_${XYZ}$ (0:weno_num_stencils, j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & + *(omega(0:weno_num_stencils)/(d_cbR_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbR_${XYZ}$ (0:weno_num_stencils, j)))) elseif (wenoz) then - alpha = d_cbR_${XYZ}$ (:, j)*(1._wp + (tau/beta)**wenoz_q) + + alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + tau/beta(0:weno_num_stencils)) elseif (teno) then - alpha = delta*d_cbR_${XYZ}$ (:, j) + alpha(0:weno_num_stencils) = delta(0:weno_num_stencils)*d_cbR_${XYZ}$ (0:weno_num_stencils, j) end if diff --git a/toolchain/mfc/case.py b/toolchain/mfc/case.py index 269c8438bf..643dd4d188 100644 --- a/toolchain/mfc/case.py +++ b/toolchain/mfc/case.py @@ -240,8 +240,8 @@ def __get_sim_fpp(self, print: bool) -> str: #:set recon_type = {recon_type} #:set weno_order = {weno_order} #:set weno_polyn = {weno_polyn} -#:set muscl_order = {int(self.params.get("muscl_order", 0))} -#:set muscl_polyn = {int(self.params.get("muscl_order", 0))} +#:set muscl_order = {int(self.params.get("muscl_order", 1))} +#:set muscl_polyn = {int(self.params.get("muscl_order", 1))} #:set muscl_lim = {int(self.params.get("muscl_lim", 1))} #:set weno_num_stencils = {weno_num_stencils} #:set nb = {int(self.params.get("nb", 1))} From 13bb65ca58b2abf0d0a4299899b58f502c0e3eb2 Mon Sep 17 00:00:00 2001 From: Anand Radhakrishnan Date: Sun, 25 Jan 2026 08:26:05 -0500 Subject: [PATCH 02/21] 95% of test suite passes --- CMakeLists.txt | 4 +- src/common/m_boundary_common.fpp | 33 +- src/common/m_chemistry.fpp | 14 +- src/common/m_phase_change.fpp | 77 +++-- src/common/m_variables_conversion.fpp | 65 +++- src/simulation/include/inline_riemann.fpp | 24 +- src/simulation/m_acoustic_src.fpp | 5 +- src/simulation/m_bubbles_EE.fpp | 7 +- src/simulation/m_bubbles_EL.fpp | 12 +- src/simulation/m_cbc.fpp | 27 +- src/simulation/m_compute_cbc.fpp | 116 +++++-- src/simulation/m_data_output.fpp | 2 +- src/simulation/m_hyperelastic.fpp | 6 +- src/simulation/m_ibm.fpp | 14 +- src/simulation/m_igr.fpp | 380 +++++++++++----------- src/simulation/m_pressure_relaxation.fpp | 9 +- src/simulation/m_qbmm.fpp | 51 ++- src/simulation/m_rhs.fpp | 4 +- src/simulation/m_riemann_solvers.fpp | 110 +++++-- src/simulation/m_sim_helpers.fpp | 4 +- src/simulation/m_surface_tension.fpp | 7 +- src/simulation/m_time_steppers.fpp | 13 +- src/simulation/m_viscous.fpp | 88 ++--- src/simulation/m_weno.fpp | 100 ++++-- toolchain/mfc/test/case.py | 2 + 25 files changed, 786 insertions(+), 388 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0507754dce..2c63582a3e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -596,8 +596,8 @@ function(MFC_SETUP_TARGET) PRIVATE -DFRONTIER_UNIFIED) endif() - find_package(hipfort COMPONENTS hip CONFIG REQUIRED) - target_link_libraries(${a_target} PRIVATE hipfort::hip hipfort::hipfort-amdgcn flang_rt.hostdevice) + find_package(hipfort COMPONENTS hip CONFIG REQUIRED) + target_link_libraries(${a_target} PRIVATE hipfort::hip hipfort::hipfort-amdgcn flang_rt.hostdevice) endif() elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") target_compile_options(${a_target} PRIVATE "SHELL:-h noacc" "SHELL:-x acc") diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 4fccf3aa68..333df31442 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -44,7 +44,7 @@ module m_boundary_common s_populate_grid_variables_buffers, & s_finalize_boundary_common_module - public :: bc_buffers + !public :: bc_buffers #ifdef MFC_MPI public :: MPI_BC_TYPE_TYPE, MPI_BC_BUFFER_TYPE @@ -54,24 +54,31 @@ contains impure subroutine s_initialize_boundary_common_module() + integer :: i, j + @:ALLOCATE(bc_buffers(1:num_dims, 1:2)) if (bc_io) then @:ALLOCATE(bc_buffers(1, 1)%sf(1:sys_size, 0:n, 0:p)) @:ALLOCATE(bc_buffers(1, 2)%sf(1:sys_size, 0:n, 0:p)) - @:ACC_SETUP_SFs(bc_buffers(1,1), bc_buffers(1,2)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 if (n > 0) then @:ALLOCATE(bc_buffers(2,1)%sf(-buff_size:m+buff_size,1:sys_size,0:p)) - @:ALLOCATE(bc_buffers(2,2)%sf(-buff_size:m+buff_size,1:sys_size,0:p)) - @:ACC_SETUP_SFs(bc_buffers(2,1), bc_buffers(2,2)) - if (p > 0) then - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - @:ALLOCATE(bc_buffers(3,1)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,1:sys_size)) - @:ALLOCATE(bc_buffers(3,2)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,1:sys_size)) - @:ACC_SETUP_SFs(bc_buffers(3,1), bc_buffers(3,2)) - #:endif - end if + @:ALLOCATE(bc_buffers(2,2)%sf(-buff_size:m+buff_size,1:sys_size,0:p)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + if (p > 0) then + @:ALLOCATE(bc_buffers(3,1)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,1:sys_size)) + @:ALLOCATE(bc_buffers(3,2)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,1:sys_size)) + end if + #:endif end if + #:endif + do i = 1, num_dims + do j = 1, 2 + @:ACC_SETUP_SFs(bc_buffers(i,j)) + end do + end do + end if end subroutine s_initialize_boundary_common_module @@ -1056,6 +1063,7 @@ contains end do end if elseif (bc_dir == 2) then !< y-direction +#:if not MFC_CASE_OPTIMIZATION or num_dims > 1 if (bc_loc == -1) then !< bc_y%beg do i = 1, sys_size do j = 1, buff_size @@ -1071,7 +1079,9 @@ contains end do end do end if +#:endif elseif (bc_dir == 3) then !< z-direction +#:if not MFC_CASE_OPTIMIZATION or num_dims > 2 if (bc_loc == -1) then !< bc_z%beg do i = 1, sys_size do j = 1, buff_size @@ -1087,6 +1097,7 @@ contains end do end do end if +#:endif end if #else call s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l) diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 5d3d1b7811..4e89ea4a8f 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -126,8 +126,13 @@ contains integer :: eqn real(wp) :: T real(wp) :: rho, omega_m +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(10) :: Ys + real(wp), dimension(10) :: omega +#:else real(wp), dimension(num_species) :: Ys real(wp), dimension(num_species) :: omega +#:endif $:GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega, eqn, T, rho, omega_m]', copyin='[bounds]') do z = bounds(3)%beg, bounds(3)%end @@ -169,11 +174,18 @@ contains type(int_bounds_info), intent(in) :: irx, iry, irz integer, intent(in) :: idir - +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(10) :: Xs_L, Xs_R, Xs_cell, Ys_L, Ys_R, Ys_cell + real(wp), dimension(10) :: mass_diffusivities_mixavg1, mass_diffusivities_mixavg2 + real(wp), dimension(10) :: mass_diffusivities_mixavg_Cell, dXk_dxi, h_l, h_r, h_k + real(wp), dimension(10) :: Mass_Diffu_Flux, dYk_dxi +#:else real(wp), dimension(num_species) :: Xs_L, Xs_R, Xs_cell, Ys_L, Ys_R, Ys_cell real(wp), dimension(num_species) :: mass_diffusivities_mixavg1, mass_diffusivities_mixavg2 real(wp), dimension(num_species) :: mass_diffusivities_mixavg_Cell, dXk_dxi, h_l, h_r, h_k real(wp), dimension(num_species) :: Mass_Diffu_Flux, dYk_dxi +#:endif + real(wp) :: Mass_Diffu_Energy real(wp) :: MW_L, MW_R, MW_cell, Rgas_L, Rgas_R, T_L, T_R, P_L, P_R, rho_L, rho_R, rho_cell, rho_Vic real(wp) :: lambda_L, lambda_R, lambda_Cell, dT_dxi, grid_spacing diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index cba9744427..7a88a7b323 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -1,6 +1,6 @@ !> energies (6-eqn to 4-eqn) equilibrium through an infinitely fast (algebraic) !> procedure. - +#:include 'case.fpp' #:include 'macros.fpp' module m_phase_change @@ -91,15 +91,18 @@ contains ! $:GPU_DECLARE(create='[pS,pSOV,pSSL,TS,TSOV,TSSL,TSatOV,TSatSL]') ! $:GPU_DECLARE(create='[rhoe,dynE,rhos,rho,rM,m1,m2,MCT,TvF]') - +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok +#:else real(wp), dimension(num_fluids) :: p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok +#:endif ! $:GPU_DECLARE(create='[p_infOV,p_infpT,p_infSL,sk,hk,gk,ek,rhok]') !< Generic loop iterators integer :: i, j, k, l ! starting equilibrium solver - $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok,pS, pSOV, pSSL, TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, dynE, rhos, rho, rM, m1, m2, MCT, TvF]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok,pS, pSOV, pSSL, TS, TSOV, TSatOV, TSatSL, TSSL, rhoe, dynE, rhos, rho, rM, m1, m2, MCT, TvF]') do j = 0, m do k = 0, n do l = 0, p @@ -231,25 +234,28 @@ contains ! Calculations AFTER equilibrium - ! entropy - sk(1:num_fluids) = cvs(1:num_fluids)*log((TS**gs_min(1:num_fluids)) & - /((pS + ps_inf(1:num_fluids))**(gs_min(1:num_fluids) - 1.0_wp))) + qvps(1:num_fluids) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + ! entropy + sk(i) = cvs(i)*log((TS**gs_min(i)) & + /((pS + ps_inf(i))**(gs_min(i) - 1.0_wp))) + qvps(i) - ! enthalpy - hk(1:num_fluids) = gs_min(1:num_fluids)*cvs(1:num_fluids)*TS & - + qvs(1:num_fluids) + ! enthalpy + hk(i) = gs_min(i)*cvs(i)*TS & + + qvs(i) - ! Gibbs-free energy - gk(1:num_fluids) = hk(1:num_fluids) - TS*sk(1:num_fluids) + ! Gibbs-free energy + gk(i) = hk(i) - TS*sk(i) - ! densities - rhok(1:num_fluids) = (pS + ps_inf(1:num_fluids)) & - /((gs_min(1:num_fluids) - 1)*cvs(1:num_fluids)*TS) + ! densities + rhok(i) = (pS + ps_inf(i)) & + /((gs_min(i) - 1)*cvs(i)*TS) - ! internal energy - ek(1:num_fluids) = (pS + gs_min(1:num_fluids) & - *ps_inf(1:num_fluids))/(pS + ps_inf(1:num_fluids)) & - *cvs(1:num_fluids)*TS + qvs(1:num_fluids) + ! internal energy + ek(i) = (pS + gs_min(i) & + *ps_inf(i))/(pS + ps_inf(i)) & + *cvs(i)*TS + qvs(i) + end do ! calculating volume fractions, internal energies, and total entropy rhos = 0.0_wp @@ -293,7 +299,11 @@ contains ! initializing variables integer, intent(in) :: j, k, l, MFL real(wp), intent(out) :: pS - real(wp), dimension(num_fluids), intent(out) :: p_infpT +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(out) :: p_infpT +#:else + real(wp), dimension(num_fluids), intent(out) :: p_infpT +#:endif type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf real(wp), intent(in) :: rhoe real(wp), intent(out) :: TS @@ -302,7 +312,11 @@ contains integer :: i, ns !< generic loop iterators ! auxiliary variables for the pT-equilibrium solver - mCP = 0.0_wp; mQ = 0.0_wp; p_infpT = ps_inf; + mCP = 0.0_wp; mQ = 0.0_wp; + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + p_infpT(i) = ps_inf(i) + end do ! Performing tests before initializing the pT-equilibrium $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids @@ -315,6 +329,13 @@ contains end do + if(num_fluids < 3) then + $:GPU_LOOP(parallelism='[seq]') + do i = num_fluids+1, 3 + p_infpT(i) = 10**32_wp + end do + end if + ! Checking energy constraint if ((rhoe - mQ - minval(p_infpT)) < 0.0_wp) then @@ -392,12 +413,19 @@ contains integer, intent(in) :: j, k, l real(wp), intent(inout) :: pS - real(wp), dimension(num_fluids), intent(in) :: p_infpT +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: p_infpT +#:else + real(wp), dimension(num_fluids), intent(in) :: p_infpT +#:endif real(wp), intent(in) :: rhoe type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf real(wp), intent(inout) :: TS - - real(wp), dimension(num_fluids) :: p_infpTg !< stiffness for the participating fluids for pTg-equilibrium +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: p_infpTg !< stiffness for the participating fluids for pTg-equilibrium +#:else + real(wp), dimension(num_fluids) :: p_infpTg !< stiffness for the participating fluids for pTg-equilibrium +#:endif real(wp), dimension(2, 2) :: Jac, InvJac, TJac !< matrices for the Newton Solver real(wp), dimension(2) :: R2D, DeltamP !< residual and correction array real(wp) :: Om ! underrelaxation factor @@ -477,7 +505,8 @@ contains call s_compute_jacobian_matrix(InvJac, j, Jac, k, l, mCPD, mCVGP, mCVGP2, pS, q_cons_vf, TJac) ! calculating correction array for Newton's method - DeltamP = -1.0_wp*matmul(InvJac, R2D) + DeltamP(1) = -1.0_wp*(InvJac(1,1)*R2D(1) + InvJac(1,2)*R2D(2)) + DeltamP(2) = -1.0_wp*(InvJac(2,1)*R2D(1) + InvJac(2,2)*R2D(2)) ! updating two reacting 'masses'. Recall that inert 'masses' do not change during the phase change ! liquid diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 144e7bce95..beb38bacf5 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -124,10 +124,11 @@ contains ! Chemistry real(wp), dimension(1:num_species), intent(in) :: rhoYks + real(wp), dimension(1:num_species) :: Y_rs real(wp) :: E_e real(wp) :: e_Per_Kg, Pdyn_Per_Kg real(wp) :: T_guess - real(wp), dimension(1:num_species) :: Y_rs + integer :: s !< Generic loop iterator @@ -251,9 +252,10 @@ contains real(wp), intent(out), target :: qv real(wp), optional, dimension(2), intent(out) :: Re_K - real(wp), optional, intent(out) :: G_K + real(wp), optional, intent(out) :: G_K + real(wp), dimension(num_fluids), intent(inout) :: alpha_rho_K, alpha_K !< real(wp), optional, dimension(num_fluids), intent(in) :: G - real(wp), dimension(num_fluids) :: alpha_rho_K, alpha_K !< + integer :: i, j !< Generic loop iterator @@ -321,10 +323,17 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), intent(out) :: rho_K, gamma_K, pi_inf_K, qv_K +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(inout) :: alpha_rho_K, alpha_K !< + real(wp), optional, dimension(3), intent(in) :: G +#:else real(wp), dimension(num_fluids), intent(inout) :: alpha_rho_K, alpha_K !< + real(wp), optional, dimension(num_fluids), intent(in) :: G +#:endif real(wp), dimension(2), intent(out) :: Re_K real(wp), optional, intent(out) :: G_K - real(wp), optional, dimension(num_fluids), intent(in) :: G + real(wp) :: alpha_K_sum + integer :: i, j !< Generic loop iterators @@ -340,11 +349,13 @@ contains qv_K = qvs(1) else if (mpp_lim) then + alpha_K_sum = 0._wp do i = 1, num_fluids alpha_rho_K(i) = max(0._wp, alpha_rho_K(i)) alpha_K(i) = min(max(0._wp, alpha_K(i)), 1._wp) + alpha_K_sum = alpha_K_sum + alpha_K(i) end do - alpha_K = alpha_K/max(sum(alpha_K), sgm_eps) + alpha_K = alpha_K/max(alpha_K_sum, sgm_eps) end if rho_K = 0._wp; gamma_K = 0._wp; pi_inf_K = 0._wp; qv_K = 0._wp do i = 1, num_fluids @@ -579,13 +590,17 @@ contains type(scalar_field), intent(inout) :: q_T_sf type(scalar_field), dimension(sys_size), intent(inout) :: qK_prim_vf type(int_bounds_info), dimension(1:3), intent(in) :: ibounds - +#:if USING_AMD and not MFC_CASE_OPTIMIZATION + real(wp), dimension(3) :: alpha_K, alpha_rho_K + real(wp), dimension(3) :: nRtmp + real(wp) :: rhoYks(1:10) +#:else real(wp), dimension(num_fluids) :: alpha_K, alpha_rho_K + real(wp), dimension(nb) :: nRtmp + real(wp) :: rhoYks(1:num_species) +#:endif real(wp), dimension(2) :: Re_K real(wp) :: rho_K, gamma_K, pi_inf_K, qv_K, dyn_pres_K - real(wp), dimension(nb) :: nRtmp - - real(wp) :: rhoYks(1:num_species) real(wp) :: vftmp, nbub_sc @@ -1166,19 +1181,27 @@ contains is1, is2, is3, s2b, s3b) integer, intent(in) :: s2b, s3b - real(wp), dimension(0:, s2b:, s3b:, 1:), intent(in) :: qK_prim_vf - real(wp), dimension(0:, s2b:, s3b:, 1:), intent(inout) :: FK_vf - real(wp), dimension(0:, s2b:, s3b:, advxb:), intent(inout) :: FK_src_vf + real(wp), dimension(0:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(in) :: qK_prim_vf + real(wp), dimension(0:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:), intent(inout) :: FK_vf + real(wp), dimension(0:, idwbuff(2)%beg:, idwbuff(3)%beg:, advxb:), intent(inout) :: FK_src_vf type(int_bounds_info), intent(in) :: is1, is2, is3 ! Partial densities, density, velocity, pressure, energy, advection ! variables, the specific heat ratio and liquid stiffness functions, ! the shear and volume Reynolds numbers and the Weber numbers +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha_rho_K + real(wp), dimension(3) :: alpha_K + real(wp), dimension(3) :: vel_K + real(wp), dimension(10) :: Y_K +#:else real(wp), dimension(num_fluids) :: alpha_rho_K real(wp), dimension(num_fluids) :: alpha_K - real(wp) :: rho_K real(wp), dimension(num_vels) :: vel_K + real(wp), dimension(num_species) :: Y_K +#:endif + real(wp) :: rho_K real(wp) :: vel_K_sum real(wp) :: pres_K real(wp) :: E_K @@ -1187,7 +1210,6 @@ contains real(wp) :: qv_K real(wp), dimension(2) :: Re_K real(wp) :: G_K - real(wp), dimension(num_species) :: Y_K real(wp) :: T_K, mix_mol_weight, R_gas integer :: i, j, k, l !< Generic loop iterators @@ -1315,8 +1337,13 @@ contains & parallelism='[seq]', cray_inline=True) type(scalar_field), dimension(sys_size), intent(in) :: q_vf integer, intent(in) :: k, l, r +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(out) :: alpha_rho_K, alpha_K +#:else real(wp), dimension(num_fluids), intent(out) :: alpha_rho_K, alpha_K +#:endif integer :: i + real(wp) :: alpha_K_sum if (num_fluids == 1) then alpha_rho_K(1) = q_vf(contxb)%sf(k, l, r) @@ -1342,11 +1369,13 @@ contains end if if (mpp_lim) then + alpha_K_sum = 0._wp do i = 1, num_fluids alpha_rho_K(i) = max(0._wp, alpha_rho_K(i)) alpha_K(i) = min(max(0._wp, alpha_K(i)), 1._wp) + alpha_K_sum = alpha_K_sum + alpha_K(i) end do - alpha_K = alpha_K/max(sum(alpha_K), 1.e-16_wp) + alpha_K = alpha_K/max(alpha_K_sum, 1.e-16_wp) end if if (num_fluids == 1 .and. bubbles_euler) alpha_K(1) = q_vf(advxb)%sf(k, l, r) @@ -1382,7 +1411,11 @@ contains real(wp), intent(in) :: pres real(wp), intent(in) :: rho, gamma, pi_inf, qv real(wp), intent(in) :: H - real(wp), dimension(num_fluids), intent(in) :: adv +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: adv +#:else + real(wp), dimension(num_fluids), intent(in) :: adv +#:endif real(wp), intent(in) :: vel_sum real(wp), intent(in) :: c_c real(wp), intent(out) :: c diff --git a/src/simulation/include/inline_riemann.fpp b/src/simulation/include/inline_riemann.fpp index 3d1095324a..5b4598c0aa 100644 --- a/src/simulation/include/inline_riemann.fpp +++ b/src/simulation/include/inline_riemann.fpp @@ -40,16 +40,34 @@ eps = 0.001_wp call get_species_enthalpies_rt(T_L, h_iL) call get_species_enthalpies_rt(T_R, h_iR) - +#:if USING_AMD + h_iL = h_iL*gas_constant/molecular_weights_nonparameter*T_L + h_iR = h_iR*gas_constant/molecular_weights_nonparameter*T_R +#:else h_iL = h_iL*gas_constant/molecular_weights*T_L h_iR = h_iR*gas_constant/molecular_weights*T_R +#:endif call get_species_specific_heats_r(T_L, Cp_iL) call get_species_specific_heats_r(T_R, Cp_iR) h_avg_2 = (sqrt(rho_L)*h_iL + sqrt(rho_R)*h_iR)/(sqrt(rho_L) + sqrt(rho_R)) Yi_avg = (sqrt(rho_L)*Ys_L + sqrt(rho_R)*Ys_R)/(sqrt(rho_L) + sqrt(rho_R)) T_avg = (sqrt(rho_L)*T_L + sqrt(rho_R)*T_R)/(sqrt(rho_L) + sqrt(rho_R)) +#:if USING_AMD + if (abs(T_L - T_R) < eps) then + ! Case when T_L and T_R are very close + Cp_avg = sum(Yi_avg(:)*(0.5_wp*Cp_iL(:) + 0.5_wp*Cp_iR(:))*gas_constant/molecular_weights_nonparameter(:)) + Cv_avg = sum(Yi_avg(:)*((0.5_wp*Cp_iL(:) + 0.5_wp*Cp_iR(:))*gas_constant/molecular_weights_nonparameter(:) - gas_constant/molecular_weights_nonparameter(:))) + else + ! Normal calculation when T_L and T_R are sufficiently different + Cp_avg = sum(Yi_avg(:)*(h_iR(:) - h_iL(:))/(T_R - T_L)) + Cv_avg = sum(Yi_avg(:)*((h_iR(:) - h_iL(:))/(T_R - T_L) - gas_constant/molecular_weights_nonparameter(:))) + end if + gamma_avg = Cp_avg/Cv_avg + Phi_avg(:) = (gamma_avg - 1._wp)*(vel_avg_rms/2.0_wp - h_avg_2(:)) + gamma_avg*gas_constant/molecular_weights_nonparameter(:)*T_avg + c_sum_Yi_Phi = sum(Yi_avg(:)*Phi_avg(:)) +#:else if (abs(T_L - T_R) < eps) then ! Case when T_L and T_R are very close Cp_avg = sum(Yi_avg(:)*(0.5_wp*Cp_iL(:) + 0.5_wp*Cp_iR(:))*gas_constant/molecular_weights(:)) @@ -59,11 +77,13 @@ Cp_avg = sum(Yi_avg(:)*(h_iR(:) - h_iL(:))/(T_R - T_L)) Cv_avg = sum(Yi_avg(:)*((h_iR(:) - h_iL(:))/(T_R - T_L) - gas_constant/molecular_weights(:))) end if - gamma_avg = Cp_avg/Cv_avg Phi_avg(:) = (gamma_avg - 1._wp)*(vel_avg_rms/2.0_wp - h_avg_2(:)) + gamma_avg*gas_constant/molecular_weights(:)*T_avg c_sum_Yi_Phi = sum(Yi_avg(:)*Phi_avg(:)) +#:endif + + end if #:enddef roe_avg diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 15864f1e2d..458f8dd514 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -143,8 +143,11 @@ contains type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf integer, intent(in) :: t_step - +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: myalpha, myalpha_rho +#:else real(wp), dimension(num_fluids) :: myalpha, myalpha_rho +#:endif real(wp) :: myRho, B_tait real(wp) :: sim_time, c, small_gamma real(wp) :: frequency_local, gauss_sigma_time_local diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index ec45d58fe9..6fda2da3b1 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -165,9 +165,14 @@ contains real(wp) :: rddot real(wp) :: pb_local, mv_local, vflux, pbdot real(wp) :: n_tait, B_tait +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: Rtmp, Vtmp + real(wp), dimension(3) :: myalpha, myalpha_rho +#:else real(wp), dimension(nb) :: Rtmp, Vtmp - real(wp) :: myR, myV, alf, myP, myRho, R2Vav, R3 real(wp), dimension(num_fluids) :: myalpha, myalpha_rho +#:endif + real(wp) :: myR, myV, alf, myP, myRho, R2Vav, R3 real(wp) :: nbub !< Bubble number density real(wp) :: my_divu diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 6f555fd59b..2f9937f140 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -537,7 +537,11 @@ contains real(wp) :: myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot real(wp) :: myPinf, aux1, aux2, myCson, myRho real(wp) :: gamma, pi_inf, qv +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: myalpha_rho, myalpha +#:else real(wp), dimension(contxe) :: myalpha_rho, myalpha +#:endif real(wp), dimension(2) :: Re integer, dimension(3) :: cell @@ -551,7 +555,7 @@ contains ! Subgrid p_inf model based on Maeda and Colonius (2018). if (lag_params%pressure_corrector) then ! Calculate velocity potentials (valid for one bubble per cell) - $:GPU_PARALLEL_LOOP(private='[k,cell]') + $:GPU_PARALLEL_LOOP(private='[k,cell,paux,preterm1,term2,Romega,myR0,myR,myV,myPb,pint,term1_fac]') do k = 1, nBubs call s_get_pinf(k, q_prim_vf, 2, paux, cell, preterm1, term2, Romega) myR0 = bub_R0(k) @@ -573,7 +577,7 @@ contains ! Radial motion model adap_dt_stop_max = 0 - $:GPU_PARALLEL_LOOP(private='[k,i,myalpha_rho,myalpha,Re,cell]', & + $:GPU_PARALLEL_LOOP(private='[k,i,myalpha_rho,myalpha,myVapFlux,preterm1, term2, paux, pint, Romega, term1_fac,myR_m, mygamma_m, myPb, myMass_n, myMass_v,myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot,myPinf, aux1, aux2, myCson, myRho,gamma,pi_inf,qv,dmalf, dmntait, dmBtait, dm_bub_adv_src, dm_divu,adap_dt_stop]', & & reduction='[[adap_dt_stop_max]]',reductionOp='[MAX]', & & copy='[adap_dt_stop_max]',copyin='[stage]') do k = 1, nBubs @@ -778,7 +782,11 @@ contains real(wp), intent(out) :: cson real(wp) :: E, H +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: vel +#:else real(wp), dimension(num_dims) :: vel +#:endif integer :: i $:GPU_LOOP(parallelism='[seq]') diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 0950072042..0f3f0011b7 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -644,25 +644,32 @@ contains integer, intent(in) :: cbc_dir_norm, cbc_loc_norm type(int_bounds_info), intent(in) :: ix, iy, iz - - ! First-order time derivatives of the partial densities, density, - ! velocity, pressure, advection variables, and the specific heat - ! ratio and liquid stiffness functions - - real(wp), dimension(num_fluids) :: dalpha_rho_dt real(wp) :: drho_dt - real(wp), dimension(num_dims) :: dvel_dt real(wp) :: dpres_dt - real(wp), dimension(num_fluids) :: dadv_dt real(wp) :: dgamma_dt real(wp) :: dpi_inf_dt real(wp) :: dqv_dt real(wp) :: dpres_ds +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha_rho, dalpha_rho_ds, mf + real(wp), dimension(3) :: vel, dvel_ds + real(wp), dimension(3) :: adv_local, dadv_ds + real(wp), dimension(12) :: L + real(wp), dimension(3) :: dadv_dt + real(wp), dimension(3) :: dvel_dt + real(wp), dimension(3) :: dalpha_rho_dt + real(wp), dimension(10) :: Ys, h_k, dYs_dt, dYs_ds, Xs, Gamma_i, Cp_i +#:else real(wp), dimension(contxe) :: alpha_rho, dalpha_rho_ds, mf - real(wp), dimension(2) :: Re_cbc real(wp), dimension(num_vels) :: vel, dvel_ds real(wp), dimension(num_fluids) :: adv_local, dadv_ds real(wp), dimension(sys_size) :: L + real(wp), dimension(num_fluids) :: dadv_dt + real(wp), dimension(num_dims) :: dvel_dt + real(wp), dimension(num_fluids) :: dalpha_rho_dt + real(wp), dimension(num_species) :: Ys, h_k, dYs_dt, dYs_ds, Xs, Gamma_i, Cp_i +#:endif + real(wp), dimension(2) :: Re_cbc real(wp), dimension(3) :: lambda real(wp) :: rho !< Cell averaged density @@ -676,7 +683,7 @@ contains real(wp) :: Ma real(wp) :: T, sum_Enthalpies real(wp) :: Cv, Cp, e_mix, Mw, R_gas - real(wp), dimension(num_species) :: Ys, h_k, dYs_dt, dYs_ds, Xs, Gamma_i, Cp_i + real(wp) :: vel_K_sum, vel_dv_dt_sum diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp index 594924e055..aabd7d726d 100644 --- a/src/simulation/m_compute_cbc.fpp +++ b/src/simulation/m_compute_cbc.fpp @@ -1,7 +1,7 @@ !> !! @file m_compute_cbc.f90 !! @brief CBC computation module - +#:include 'case.fpp' #:include 'macros.fpp' module m_compute_cbc @@ -23,7 +23,11 @@ contains $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(3), intent(in) :: lambda real(wp), intent(in) :: rho, c, dpres_ds - real(wp), dimension(num_dims), intent(in) :: dvel_ds +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: dvel_ds +#:else + real(wp), dimension(num_dims), intent(in) :: dvel_ds +#:endif real(wp) :: L1 L1 = lambda(1)*(dpres_ds - rho*c*dvel_ds(dir_idx(1))) end function f_base_L1 @@ -31,9 +35,14 @@ contains !> Fill density L variables subroutine s_fill_density_L(L, lambda_factor, lambda2, c, mf, dalpha_rho_ds, dpres_ds) $:GPU_ROUTINE(parallelism='[seq]') +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds +#:else real(wp), dimension(sys_size), intent(inout) :: L - real(wp), intent(in) :: lambda_factor, lambda2, c real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds +#:endif + real(wp), intent(in) :: lambda_factor, lambda2, c real(wp), intent(in) :: dpres_ds integer :: i @@ -46,9 +55,14 @@ contains !> Fill velocity L variables subroutine s_fill_velocity_L(L, lambda_factor, lambda2, dvel_ds) $:GPU_ROUTINE(parallelism='[seq]') +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(3), intent(in) :: dvel_ds +#:else real(wp), dimension(sys_size), intent(inout) :: L - real(wp), intent(in) :: lambda_factor, lambda2 real(wp), dimension(num_dims), intent(in) :: dvel_ds +#:endif + real(wp), intent(in) :: lambda_factor, lambda2 integer :: i ! $:GPU_LOOP(parallelism='[seq]') @@ -60,9 +74,14 @@ contains !> Fill advection L variables subroutine s_fill_advection_L(L, lambda_factor, lambda2, dadv_ds) $:GPU_ROUTINE(parallelism='[seq]') +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(3), intent(in) :: dadv_ds +#:else real(wp), dimension(sys_size), intent(inout) :: L - real(wp), intent(in) :: lambda_factor, lambda2 real(wp), dimension(num_fluids), intent(in) :: dadv_ds +#:endif + real(wp), intent(in) :: lambda_factor, lambda2 integer :: i ! $:GPU_LOOP(parallelism='[seq]') @@ -74,9 +93,14 @@ contains !> Fill chemistry L variables subroutine s_fill_chemistry_L(L, lambda_factor, lambda2, dYs_ds) $:GPU_ROUTINE(parallelism='[seq]') +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(10), intent(in) :: dYs_ds +#:else real(wp), dimension(sys_size), intent(inout) :: L - real(wp), intent(in) :: lambda_factor, lambda2 real(wp), dimension(num_species), intent(in) :: dYs_ds +#:endif + real(wp), intent(in) :: lambda_factor, lambda2 integer :: i if (.not. chemistry) return @@ -93,9 +117,14 @@ contains & cray_inline=True) real(wp), dimension(3), intent(in) :: lambda +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(3), intent(in) :: dvel_ds +#:else real(wp), dimension(sys_size), intent(inout) :: L - real(wp), intent(in) :: rho, c, dpres_ds real(wp), dimension(num_dims), intent(in) :: dvel_ds +#:endif + real(wp), intent(in) :: rho, c, dpres_ds integer :: i L(1) = f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) @@ -109,13 +138,22 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds + real(wp), dimension(3), intent(in) :: dvel_ds + real(wp), dimension(3), intent(in) :: dadv_ds + real(wp), dimension(10), intent(in) :: dYs_ds +#:else real(wp), dimension(sys_size), intent(inout) :: L - real(wp), intent(in) :: rho, c real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds - real(wp), intent(in) :: dpres_ds real(wp), dimension(num_dims), intent(in) :: dvel_ds real(wp), dimension(num_fluids), intent(in) :: dadv_ds real(wp), dimension(num_species), intent(in) :: dYs_ds +#:endif + real(wp), intent(in) :: rho, c + real(wp), intent(in) :: dpres_ds + real(wp) :: lambda_factor lambda_factor = (5.e-1_wp - 5.e-1_wp*sign(1._wp, lambda(1))) @@ -137,9 +175,15 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(3), intent(in) :: dvel_ds +#:else real(wp), dimension(sys_size), intent(inout) :: L + real(wp), dimension(num_dims), intent(in) :: dvel_ds +#:endif real(wp), intent(in) :: rho, c, dpres_ds - real(wp), dimension(num_dims), intent(in) :: dvel_ds + L(1) = f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) L(2:advxe) = 0._wp @@ -152,13 +196,21 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds + real(wp), dimension(3), intent(in) :: dvel_ds + real(wp), dimension(3), intent(in) :: dadv_ds + real(wp), dimension(10), intent(in) :: dYs_ds +#:else real(wp), dimension(sys_size), intent(inout) :: L - real(wp), intent(in) :: rho, c real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds - real(wp), intent(in) :: dpres_ds real(wp), dimension(num_dims), intent(in) :: dvel_ds real(wp), dimension(num_fluids), intent(in) :: dadv_ds real(wp), dimension(num_species), intent(in) :: dYs_ds +#:endif + real(wp), intent(in) :: rho, c + real(wp), intent(in) :: dpres_ds L(1) = f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) call s_fill_density_L(L, 1._wp, lambda(2), c, mf, dalpha_rho_ds, dpres_ds) @@ -174,12 +226,19 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds + real(wp), dimension(3), intent(in) :: dvel_ds + real(wp), dimension(3), intent(in) :: dadv_ds +#:else real(wp), dimension(sys_size), intent(inout) :: L - real(wp), intent(in) :: rho, c real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds - real(wp), intent(in) :: dpres_ds real(wp), dimension(num_dims), intent(in) :: dvel_ds real(wp), dimension(num_fluids), intent(in) :: dadv_ds +#:endif + real(wp), intent(in) :: rho, c + real(wp), intent(in) :: dpres_ds L(1) = f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) call s_fill_density_L(L, 1._wp, lambda(2), c, mf, dalpha_rho_ds, dpres_ds) @@ -194,12 +253,20 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds + real(wp), dimension(3), intent(in) :: dvel_ds + real(wp), dimension(3), intent(in) :: dadv_ds +#:else real(wp), dimension(sys_size), intent(inout) :: L - real(wp), intent(in) :: rho, c real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds - real(wp), intent(in) :: dpres_ds real(wp), dimension(num_dims), intent(in) :: dvel_ds real(wp), dimension(num_fluids), intent(in) :: dadv_ds +#:endif + real(wp), intent(in) :: rho, c + real(wp), intent(in) :: dpres_ds + L(1) = f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) call s_fill_density_L(L, 1._wp, lambda(2), c, mf, dalpha_rho_ds, dpres_ds) @@ -212,8 +279,11 @@ contains subroutine s_compute_supersonic_inflow_L(L) $:GPU_ROUTINE(function_name='s_compute_supersonic_inflow_L', & & parallelism='[seq]', cray_inline=True) - +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(12), intent(inout) :: L +#:else real(wp), dimension(sys_size), intent(inout) :: L +#:endif L(1:advxe) = 0._wp if (chemistry) L(chemxb:chemxe) = 0._wp end subroutine s_compute_supersonic_inflow_L @@ -224,13 +294,21 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds + real(wp), dimension(3), intent(in) :: dvel_ds + real(wp), dimension(3), intent(in) :: dadv_ds + real(wp), dimension(10), intent(in) :: dYs_ds +#:else real(wp), dimension(sys_size), intent(inout) :: L - real(wp), intent(in) :: rho, c real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds - real(wp), intent(in) :: dpres_ds real(wp), dimension(num_dims), intent(in) :: dvel_ds real(wp), dimension(num_fluids), intent(in) :: dadv_ds real(wp), dimension(num_species), intent(in) :: dYs_ds +#:endif + real(wp), intent(in) :: rho, c + real(wp), intent(in) :: dpres_ds L(1) = f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) call s_fill_density_L(L, 1._wp, lambda(2), c, mf, dalpha_rho_ds, dpres_ds) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index d239532f43..97c1ce5a4d 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -269,7 +269,7 @@ contains real(wp) :: rho !< Cell-avg. density #:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(2) :: alpha !< Cell-avg. volume fraction + real(wp), dimension(3) :: alpha !< Cell-avg. volume fraction real(wp), dimension(3) :: vel !< Cell-avg. velocity #:else real(wp), dimension(num_fluids) :: alpha !< Cell-avg. volume fraction diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index 76e0c74841..599ef3302f 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -98,9 +98,13 @@ contains type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf - +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(9) :: tensora, tensorb + real(wp), dimension(3) :: alpha_k, alpha_rho_k +#:else real(wp), dimension(tensor_size) :: tensora, tensorb real(wp), dimension(num_fluids) :: alpha_k, alpha_rho_k +#:endif real(wp), dimension(2) :: Re real(wp) :: rho, gamma, pi_inf, qv real(wp) :: G_local diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 2c5d133060..1d4d712226 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -175,15 +175,23 @@ contains real(wp), dimension(2) :: Re_K real(wp) :: G_K real(wp) :: qv_K - real(wp), dimension(num_fluids) :: Gs real(wp) :: pres_IP real(wp), dimension(3) :: vel_IP, vel_norm_IP real(wp) :: c_IP +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: Gs + real(wp), dimension(3) :: alpha_rho_IP, alpha_IP + real(wp), dimension(3) :: r_IP, v_IP, pb_IP, mv_IP + real(wp), dimension(18) :: nmom_IP + real(wp), dimension(12) :: presb_IP, massv_IP +#:else + real(wp), dimension(num_fluids) :: Gs real(wp), dimension(num_fluids) :: alpha_rho_IP, alpha_IP real(wp), dimension(nb) :: r_IP, v_IP, pb_IP, mv_IP real(wp), dimension(nb*nmom) :: nmom_IP real(wp), dimension(nb*nnode) :: presb_IP, massv_IP +#:endif !! Primitive variables at the image point associated with a ghost point, !! interpolated from surrounding fluid cells. @@ -854,7 +862,11 @@ contains real(wp), intent(INOUT) :: pres_IP real(wp), dimension(3), intent(INOUT) :: vel_IP real(wp), intent(INOUT) :: c_IP +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(INOUT) :: alpha_IP, alpha_rho_IP +#:else real(wp), dimension(num_fluids), intent(INOUT) :: alpha_IP, alpha_rho_IP +#:endif real(wp), optional, dimension(:), intent(INOUT) :: r_IP, v_IP, pb_IP, mv_IP real(wp), optional, dimension(:), intent(INOUT) :: nmom_IP real(wp), optional, dimension(:), intent(INOUT) :: presb_IP, massv_IP diff --git a/src/simulation/m_igr.fpp b/src/simulation/m_igr.fpp index e99c9a6401..7e27a51de9 100644 --- a/src/simulation/m_igr.fpp +++ b/src/simulation/m_igr.fpp @@ -48,9 +48,6 @@ module m_igr integer :: vidxb, vidxe $:GPU_DECLARE(create='[vidxb, vidxe]') - integer :: offxL, offxR - $:GPU_DECLARE(create='[offxL, offxR]') - real(wp), allocatable, dimension(:) :: coeff_L, coeff_R $:GPU_DECLARE(create='[coeff_L, coeff_R]') #:else @@ -58,69 +55,65 @@ module m_igr integer, parameter :: vidxb = -2 integer, parameter :: vidxe = 3 - integer, parameter :: offxL = 2 - integer, parameter :: offxR = 3 #if defined(MFC_OpenMP) - real(wp) :: coeff_L(1:5) = [ & - -3._wp/60._wp, & ! Index -1 - 27._wp/60._wp, & ! Index 0 - 47._wp/60._wp, & ! Index 1 - -13._wp/60._wp, & ! Index 2 - 2._wp/60._wp & ! Index 3 - ] - - real(wp) :: coeff_R(1:5) = [ & - 2._wp/60._wp, & ! Index -2 - -13._wp/60._wp, & ! Index -1 - 47._wp/60._wp, & ! Index 0 - 27._wp/60._wp, & ! Index 1 - -3._wp/60._wp & ! Index 2 - ] + real(wp) :: coeff_L(-1:3) = [ & + -3._wp/60._wp, & ! Index -1 + 27._wp/60._wp, & ! Index 0 + 47._wp/60._wp, & ! Index 1 + -13._wp/60._wp, & ! Index 2 + 2._wp/60._wp & ! Index 3 + ] + + real(wp) :: coeff_R(-2:2) = [ & + 2._wp/60._wp, & ! Index -2 + -13._wp/60._wp, & ! Index -1 + 47._wp/60._wp, & ! Index 0 + 27._wp/60._wp, & ! Index 1 + -3._wp/60._wp & ! Index 2 + ] #else - real(wp), parameter :: coeff_L(1:5) = [ & - -3._wp/60._wp, & ! Index -1 - 27._wp/60._wp, & ! Index 0 - 47._wp/60._wp, & ! Index 1 - -13._wp/60._wp, & ! Index 2 - 2._wp/60._wp & ! Index 3 - ] - - real(wp), parameter :: coeff_R(1:5) = [ & - 2._wp/60._wp, & ! Index -2 - -13._wp/60._wp, & ! Index -1 - 47._wp/60._wp, & ! Index 0 - 27._wp/60._wp, & ! Index 1 - -3._wp/60._wp & ! Index 2 - ] + real(wp), parameter :: coeff_L(-1:3) = [ & + -3._wp/60._wp, & ! Index -1 + 27._wp/60._wp, & ! Index 0 + 47._wp/60._wp, & ! Index 1 + -13._wp/60._wp, & ! Index 2 + 2._wp/60._wp & ! Index 3 + ] + + real(wp), parameter :: coeff_R(-2:2) = [ & + 2._wp/60._wp, & ! Index -2 + -13._wp/60._wp, & ! Index -1 + 47._wp/60._wp, & ! Index 0 + 27._wp/60._wp, & ! Index 1 + -3._wp/60._wp & ! Index 2 + ] #endif #:elif igr_order == 3 integer, parameter :: vidxb = -1 integer, parameter :: vidxe = 2 - integer, parameter :: offxL = 1 - integer, parameter :: offxR = 2 #if defined(MFC_OpenMP) - real(wp) :: coeff_L(1:3) = [ & - 2._wp/6._wp, & ! Index 0 - 5._wp/6._wp, & ! Index 1 - -1._wp/6._wp & ! Index 2 - ] - real(wp) :: coeff_R(1:3) = [ & - -1._wp/6._wp, & ! Index -1 - 5._wp/6._wp, & ! Index 0 - 2._wp/6._wp & ! Index 1 - ] + real(wp) :: coeff_L(0:2) = [ & + 2._wp/6._wp, & ! Index 0 + 5._wp/6._wp, & ! Index 1 + -1._wp/6._wp & ! Index 2 + ] + real(wp) :: coeff_R(-1:1) = [ & + -1._wp/6._wp, & ! Index -1 + 5._wp/6._wp, & ! Index 0 + 2._wp/6._wp & ! Index 1 + ] #else - real(wp), parameter :: coeff_L(1:3) = [ & - 2._wp/6._wp, & ! Index 0 - 5._wp/6._wp, & ! Index 1 - -1._wp/6._wp & ! Index 2 - ] - real(wp), parameter :: coeff_R(1:3) = [ & - -1._wp/6._wp, & ! Index -1 - 5._wp/6._wp, & ! Index 0 - 2._wp/6._wp & ! Index 1 - ] + real(wp), parameter :: coeff_L(0:2) = [ & + 2._wp/6._wp, & ! Index 0 + 5._wp/6._wp, & ! Index 1 + -1._wp/6._wp & ! Index 2 + ] + real(wp), parameter :: coeff_R(-1:1) = [ & + -1._wp/6._wp, & ! Index -1 + 5._wp/6._wp, & ! Index 0 + 2._wp/6._wp & ! Index 1 + ] #endif #:endif @@ -224,36 +217,37 @@ contains #:if not MFC_CASE_OPTIMIZATION if (igr_order == 3) then - vidxb = -1; vidxe = 2; offxL = 1; offxR = 2 - $:GPU_UPDATE(device='[vidxb, vidxe, offxL, offxR]') + vidxb = -1; vidxe = 2; + $:GPU_UPDATE(device='[vidxb, vidxe]') + + @:ALLOCATE(coeff_L(0:2)) + coeff_L(0) = (2._wp/6._wp) + coeff_L(1) = (5._wp/6._wp) + coeff_L(2) = (-1._wp/6._wp) - @:ALLOCATE(coeff_L(1:3)) - coeff_L(1) = (2._wp/6._wp) - coeff_L(2) = (5._wp/6._wp) - coeff_L(3) = (-1._wp/6._wp) + @:ALLOCATE(coeff_R(-1:1)) + coeff_R(1) = (2._wp/6._wp) + coeff_R(0) = (5._wp/6._wp) + coeff_R(-1) = (-1._wp/6._wp) - @:ALLOCATE(coeff_R(1:3)) - coeff_R(3) = (2._wp/6._wp) - coeff_R(2) = (5._wp/6._wp) - coeff_R(1) = (-1._wp/6._wp) elseif (igr_order == 5) then - vidxb = -2; vidxe = 3; offxL = 2; offxR = 3 - $:GPU_UPDATE(device='[vidxb, vidxe, offxL, offxR]') - - @:ALLOCATE(coeff_L(1:5)) - coeff_L(1) = (-3._wp/60._wp) - coeff_L(2) = (27._wp/60._wp) - coeff_L(3) = (47._wp/60._wp) - coeff_L(4) = (-13._wp/60._wp) - coeff_L(5) = (2._wp/60._wp) - - @:ALLOCATE(coeff_R(1:5)) - coeff_R(5) = (-3._wp/60._wp) - coeff_R(4) = (27._wp/60._wp) - coeff_R(3) = (47._wp/60._wp) - coeff_R(2) = (-13._wp/60._wp) - coeff_R(1) = (2._wp/60._wp) + vidxb = -2; vidxe = 3; + $:GPU_UPDATE(device='[vidxb, vidxe]') + + @:ALLOCATE(coeff_L(-1:3)) + coeff_L(-1) = (-3._wp/60._wp) + coeff_L(0) = (27._wp/60._wp) + coeff_L(1) = (47._wp/60._wp) + coeff_L(2) = (-13._wp/60._wp) + coeff_L(3) = (2._wp/60._wp) + + @:ALLOCATE(coeff_R(-2:2)) + coeff_R(2) = (-3._wp/60._wp) + coeff_R(1) = (27._wp/60._wp) + coeff_R(0) = (47._wp/60._wp) + coeff_R(-1) = (-13._wp/60._wp) + coeff_R(-2) = (2._wp/60._wp) end if $:GPU_UPDATE(device='[coeff_L]') @@ -408,22 +402,22 @@ contains do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q + offxL)*q_cons_vf(i)%sf(j + q, k, l) + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) end do - vel_L = vel_L + coeff_L(q + offxL)*q_cons_vf(momxb)%sf(j + q, k, l) - F_L = F_L + coeff_L(q + offxL)*jac(j + q, k, l) + vel_L = vel_L + coeff_L(q)*q_cons_vf(momxb)%sf(j + q, k, l) + F_L = F_L + coeff_L(q)*jac(j + q, k, l) end do $:GPU_LOOP(parallelism='[seq]') do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q + offxR)*q_cons_vf(i)%sf(j + q, k, l) + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) end do - vel_R = vel_R + coeff_R(q + offxR)*q_cons_vf(momxb)%sf(j + q, k, l) - F_R = F_R + coeff_R(q + offxR)*jac(j + q, k, l) + vel_R = vel_R + coeff_R(q)*q_cons_vf(momxb)%sf(j + q, k, l) + F_R = F_R + coeff_R(q)*jac(j + q, k, l) end do $:GPU_LOOP(parallelism='[seq]') @@ -531,12 +525,12 @@ contains end if if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(4._wp*dvel_small(1))/3._wp + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(1))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(4._wp*dvel_small(1))/3._wp + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(1))/3._wp end if !y-direction contributions @@ -565,12 +559,12 @@ contains end if if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(2))/3._wp + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(2))/3._wp + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp end if if (q == 0) then @@ -597,13 +591,13 @@ contains do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q + offxL)*q_cons_vf(i)%sf(j + q, k, l) + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q + offxL)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) end do else alpha_L(1) = 1._wp @@ -611,7 +605,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q + offxL)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) end do end do @@ -619,13 +613,13 @@ contains do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q + offxR)*q_cons_vf(i)%sf(j + q, k, l) + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q + offxR)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) end do else alpha_R(1) = 1._wp @@ -633,7 +627,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q + offxR)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) end do end do @@ -739,12 +733,12 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q + offxL)*q_cons_vf(E_idx)%sf(j + q, k, l) + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j + q, k, l) end do $:GPU_LOOP(parallelism='[seq]') do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q + offxR)*q_cons_vf(E_idx)%sf(j + q, k, l) + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j + q, k, l) end do call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & @@ -952,14 +946,14 @@ contains end if if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(2)) - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(4._wp*dvel_small(1))/3._wp + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(1))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(2)) - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(4._wp*dvel_small(1))/3._wp + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(1))/3._wp end if !y-direction contributions @@ -990,12 +984,12 @@ contains end if if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(2))/3._wp + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(2))/3._wp + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp end if !z-direction contributions @@ -1026,12 +1020,12 @@ contains end if if (q > vidxb) then - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(3))/3._wp + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(3))/3._wp end if if (q < vidxe) then - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(3))/3._wp + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(3))/3._wp end if if (q == 0) then @@ -1061,13 +1055,13 @@ contains do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q + offxL)*q_cons_vf(i)%sf(j + q, k, l) + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q + offxL)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) end do else alpha_L(1) = 1._wp @@ -1075,7 +1069,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q + offxL)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) end do end do @@ -1083,13 +1077,13 @@ contains do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q + offxR)*q_cons_vf(i)%sf(j + q, k, l) + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q + offxR)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) end do else alpha_R(1) = 1._wp @@ -1097,7 +1091,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q + offxR)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) end do end do @@ -1232,12 +1226,12 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q + offxL)*q_cons_vf(E_idx)%sf(j + q, k, l) + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j + q, k, l) end do $:GPU_LOOP(parallelism='[seq]') do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q + offxR)*q_cons_vf(E_idx)%sf(j + q, k, l) + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j + q, k, l) end do call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & @@ -1459,12 +1453,12 @@ contains q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(1))/3._wp + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(1))/3._wp + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp end if !y-direction contributions @@ -1486,12 +1480,12 @@ contains q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(4._wp*dvel_small(2))/3._wp + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(2))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(4._wp*dvel_small(2))/3._wp + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(2))/3._wp end if end do end if @@ -1513,13 +1507,13 @@ contains do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q + offxL)*q_cons_vf(i)%sf(j, k + q, l) + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k + q, l) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q + offxL)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) end do else alpha_L(1) = 1._wp @@ -1527,7 +1521,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q + offxL)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) end do end do @@ -1535,13 +1529,13 @@ contains do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q + offxR)*q_cons_vf(i)%sf(j, k + q, l) + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k + q, l) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q + offxR)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) end do else alpha_R(1) = 1._wp @@ -1549,7 +1543,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q + offxR)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) end do end do @@ -1657,14 +1651,14 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q + offxL)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_L = F_L + coeff_L(q + offxL)*jac(j, k + q, l) + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_L = F_L + coeff_L(q)*jac(j, k + q, l) end do $:GPU_LOOP(parallelism='[seq]') do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q + offxR)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_R = F_R + coeff_R(q + offxR)*jac(j, k + q, l) + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_R = F_R + coeff_R(q)*jac(j, k + q, l) end do call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & @@ -1856,12 +1850,12 @@ contains q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(1))/3._wp + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(1))/3._wp + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp end if !y-direction contributions @@ -1886,14 +1880,14 @@ contains q_cons_vf(momxb + 2)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(1)) - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(4._wp*dvel_small(2))/3._wp + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(2))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(1)) - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(4._wp*dvel_small(2))/3._wp + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(2))/3._wp end if !z-direction contributions @@ -1914,12 +1908,12 @@ contains q_cons_vf(momxb + 2)%sf(j, k + q, l + 1)/rho_sf_small(1) - & q_cons_vf(momxb + 2)%sf(j, k + q, l - 1)/rho_sf_small(-1)) if (q > vidxb) then - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(3))/3._wp + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(3))/3._wp end if if (q < vidxe) then - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(3))/3._wp + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(3))/3._wp end if end do end if @@ -1941,13 +1935,13 @@ contains do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q + offxL)*q_cons_vf(i)%sf(j, k + q, l) + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k + q, l) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q + offxL)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) end do else alpha_L(1) = 1._wp @@ -1955,7 +1949,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q + offxL)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) end do end do @@ -1963,13 +1957,13 @@ contains do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q + offxR)*q_cons_vf(i)%sf(j, k + q, l) + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k + q, l) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q + offxR)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) end do else alpha_R(1) = 1._wp @@ -1977,7 +1971,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q + offxR)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) end do end do @@ -2113,14 +2107,14 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q + offxL)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_L = F_L + coeff_L(q + offxL)*jac(j, k + q, l) + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_L = F_L + coeff_L(q)*jac(j, k + q, l) end do $:GPU_LOOP(parallelism='[seq]') do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q + offxR)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_R = F_R + coeff_R(q + offxR)*jac(j, k + q, l) + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_R = F_R + coeff_R(q)*jac(j, k + q, l) end do call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & @@ -2341,12 +2335,12 @@ contains q_cons_vf(momxb + 2)%sf(j - 1, k, l + q)/rho_sf_small(-1)) if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(1))/3._wp + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(3)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(1))/3._wp + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(3)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp end if !y-direction contributions @@ -2368,12 +2362,12 @@ contains q_cons_vf(momxb + 2)%sf(j, k - 1, l + q)/rho_sf_small(-1)) if (q > vidxb) then - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(3)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(-2._wp*dvel_small(2))/3._wp + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(3)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp end if if (q < vidxe) then - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(3)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(-2._wp*dvel_small(2))/3._wp + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(3)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp end if !z-direction contributions @@ -2396,14 +2390,14 @@ contains q_cons_vf(momxb + 2)%sf(j, k, l + 1 + q)/rho_sf_small(1) - & q_cons_vf(momxb + 2)%sf(j, k, l - 1 + q)/rho_sf_small(-1)) if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q + offxL)*(dvel_small(1)) - vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q + offxL)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q + offxL)*(4._wp*dvel_small(3))/3._wp + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(2) = vflux_L_arr(2) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(3))/3._wp end if if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q + offxR)*(dvel_small(1)) - vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q + offxR)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q + offxR)*(4._wp*dvel_small(3))/3._wp + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(2) = vflux_R_arr(2) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(3))/3._wp end if end do end if @@ -2426,13 +2420,13 @@ contains do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q + offxL)*q_cons_vf(i)%sf(j, k, l + q) + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k, l + q) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q + offxL)*q_cons_vf(E_idx + i)%sf(j, k, l + q) + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k, l + q) end do else alpha_L(1) = 1._wp @@ -2440,7 +2434,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q + offxL)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) end do end do @@ -2448,13 +2442,13 @@ contains do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q + offxR)*q_cons_vf(i)%sf(j, k, l + q) + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k, l + q) end do if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q + offxR)*q_cons_vf(E_idx + i)%sf(j, k, l + q) + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k, l + q) end do else alpha_R(1) = 1._wp @@ -2462,7 +2456,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q + offxR)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k, l + q) end do end do @@ -2598,14 +2592,14 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q + offxL)*q_cons_vf(E_idx)%sf(j, k, l + q) - F_L = F_L + coeff_L(q + offxL)*jac(j, k, l + q) + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k, l + q) + F_L = F_L + coeff_L(q)*jac(j, k, l + q) end do $:GPU_LOOP(parallelism='[seq]') do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q + offxR)*q_cons_vf(E_idx)%sf(j, k, l + q) - F_R = F_R + coeff_R(q + offxR)*jac(j, k, l + q) + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k, l + q) + F_R = F_R + coeff_R(q)*jac(j, k, l + q) end do call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & diff --git a/src/simulation/m_pressure_relaxation.fpp b/src/simulation/m_pressure_relaxation.fpp index c78fbe90ae..d87eab8898 100644 --- a/src/simulation/m_pressure_relaxation.fpp +++ b/src/simulation/m_pressure_relaxation.fpp @@ -146,7 +146,11 @@ contains integer, intent(in) :: j, k, l real(wp) :: pres_relax, f_pres, df_pres +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: pres_K_init, rho_K_s +#:else real(wp), dimension(num_fluids) :: pres_K_init, rho_K_s +#:endif integer, parameter :: MAX_ITER = 50 real(wp), parameter :: TOLERANCE = 1.e-10_wp integer :: iter, i @@ -213,8 +217,11 @@ contains type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf integer, intent(in) :: j, k, l - +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(2) :: alpha_rho, alpha +#:else real(wp), dimension(num_fluids) :: alpha_rho, alpha +#:endif real(wp) :: rho, dyn_pres, gamma, pi_inf, pres_relax, sum_alpha real(wp), dimension(2) :: Re integer :: i, q diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index f1f5453720..2f2dd46ed5 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -571,11 +571,15 @@ contains & cray_inline=True) real(wp), intent(in) :: pres, rho, c +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(32, 0:2, 0:2), intent(out) :: coeffs +#:else real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs +#:endif integer :: i1, i2 - coeffs = 0._wp + coeffs(:, :, :) = 0._wp do i2 = 0, 2; do i1 = 0, 2 if ((i1 + i2) <= 2) then @@ -646,17 +650,21 @@ contains & cray_inline=True) real(wp), intent(in) :: pres, rho, c +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(32, 0:2, 0:2), intent(out) :: coeffs +#:else real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs +#:endif integer :: i1, i2 - coeffs = 0._wp + coeffs(:, :, :) = 0._wp do i2 = 0, 2; do i1 = 0, 2 if ((i1 + i2) <= 2) then if (bubble_model == 3) then ! RPE - #:if not MFC_CASE_OPTIMIZATION or nterms > 7 + #:if not MFC_CASE_OPTIMIZATION or nterms > 1 coeffs(1, i1, i2) = -1._wp*i2*pres/rho coeffs(2, i1, i2) = -3._wp*i2/2._wp coeffs(3, i1, i2) = i2/rho @@ -715,10 +723,15 @@ contains real(stp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: mv real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: rhs_mv type(int_bounds_info), intent(in) :: ix, iy, iz - +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(6) :: moms, msum + real(wp), dimension(4, 3) :: wght, abscX, abscY, wght_pb, wght_mv, wght_ht, ht + real(wp), dimension(32, 0:2, 0:2) :: coeff +#:else real(wp), dimension(nmom) :: moms, msum real(wp), dimension(nnode, nb) :: wght, abscX, abscY, wght_pb, wght_mv, wght_ht, ht real(wp), dimension(nterms, 0:2, 0:2) :: coeff +#:endif real(wp) :: pres, rho, nbub, c, alf, momsum, drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, grad_T real(wp) :: n_tait, B_tait integer :: id1, id2, id3, i1, i2, j, q, r @@ -871,7 +884,11 @@ contains $:GPU_ROUTINE(function_name='s_coeff_selector',parallelism='[seq]', & & cray_inline=True) real(wp), intent(in) :: pres, rho, c +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(32, 0:2, 0:2), intent(out) :: coeff +#:else real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeff +#:endif logical, intent(in) :: polytropic if (polytropic) then call s_coeff(pres, rho, c, coeff) @@ -960,15 +977,24 @@ contains function f_quad(abscX, abscY, wght_in, q, r, s) $:GPU_ROUTINE(parallelism='[seq]') - real(wp), dimension(nnode, nb), intent(in) :: abscX, abscY, wght_in +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(4, 3), intent(in) :: abscX, abscY, wght_in +#:else + real(wp), dimension(nnode, nb), intent(in) :: abscX, abscY, wght_in +#:endif real(wp), intent(in) :: q, r, s real(wp) :: f_quad_RV, f_quad - integer :: i + integer :: i, i1 f_quad = 0._wp + $:GPU_LOOP(parallelism='[seq]') do i = 1, nb - f_quad_RV = sum(wght_in(:, i)*(abscX(:, i)**q)*(abscY(:, i)**r)) + f_quad_RV = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i1 = 1, nnode + f_quad_RV = f_quad_RV + wght_in(i1, i)*(abscX(i1, i)**q)*(abscY(i1, i)**r) + end do f_quad = f_quad + weight(i)*(R0(i)**s)*f_quad_RV end do @@ -976,12 +1002,21 @@ contains function f_quad2D(abscX, abscY, wght_in, pow) $:GPU_ROUTINE(parallelism='[seq]') +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(4), intent(in) :: abscX, abscY, wght_in +#:else real(wp), dimension(nnode), intent(in) :: abscX, abscY, wght_in +#:endif real(wp), dimension(3), intent(in) :: pow real(wp) :: f_quad2D + integer :: i - f_quad2D = sum(wght_in(:)*(abscX(:)**pow(1))*(abscY(:)**pow(2))) + f_quad2D = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, nnode + f_quad2D = f_quad2D + wght_in(i)*(abscX(i)**pow(1))*(abscY(i)**pow(2)) + end do end function f_quad2D end subroutine s_mom_inv diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 1c8eda1c30..9689be4db0 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -339,7 +339,7 @@ contains ! END: Allocation/Association of flux_n, flux_src_n, and flux_gsrc_n end if - if (.not. igr) then + if ((.not. igr) .or. dummy) then ! Allocation of dq_prim_ds_qp @:ALLOCATE(dq_prim_dx_qp(1:1)) @@ -781,7 +781,7 @@ contains call nvtxEndRange end if end if - if(.not. igr .or. dummy) then! Finite volume solve + if((.not. igr) .or. dummy) then! Finite volume solve ! Reconstructing Primitive/Conservative Variables call nvtxStartRange("RHS-WENO") diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index fc5cefadaf..d8b5792fe0 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -49,6 +49,10 @@ module m_riemann_solvers get_species_specific_heats_r, get_species_enthalpies_rt, & get_mixture_specific_heat_cp_mass + #:if USING_AMD + use m_chemistry, only: molecular_weights_nonparameter + #:endif + implicit none private; public :: s_initialize_riemann_solvers_module, & @@ -287,17 +291,25 @@ contains integer, intent(in) :: norm_dir type(int_bounds_info), intent(in) :: ix, iy, iz - +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha_rho_L, alpha_rho_R + real(wp), dimension(3) :: vel_L, vel_R + real(wp), dimension(3) :: alpha_L, alpha_R + real(wp), dimension(10) :: Ys_L, Ys_R + real(wp), dimension(10) :: Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR + real(wp), dimension(10) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 +#:else real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R - real(wp) :: rho_L, rho_R real(wp), dimension(num_vels) :: vel_L, vel_R - real(wp) :: pres_L, pres_R - real(wp) :: E_L, E_R - real(wp) :: H_L, H_R real(wp), dimension(num_fluids) :: alpha_L, alpha_R real(wp), dimension(num_species) :: Ys_L, Ys_R real(wp), dimension(num_species) :: Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR real(wp), dimension(num_species) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 +#:endif + real(wp) :: rho_L, rho_R + real(wp) :: pres_L, pres_R + real(wp) :: E_L, E_R + real(wp) :: H_L, H_R real(wp) :: Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi real(wp) :: T_L, T_R real(wp) :: Y_L, Y_R @@ -481,9 +493,13 @@ contains call get_mixture_molecular_weight(Ys_L, MW_L) call get_mixture_molecular_weight(Ys_R, MW_R) - +#:if USING_AMD + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights_nonparameter(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights_nonparameter(:) +#:else Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) +#:endif R_gas_L = gas_constant/MW_L R_gas_R = gas_constant/MW_R @@ -1052,17 +1068,28 @@ contains integer, intent(in) :: norm_dir type(int_bounds_info), intent(in) :: ix, iy, iz - +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha_rho_L, alpha_rho_R + real(wp), dimension(3) :: vel_L, vel_R + real(wp), dimension(3) :: alpha_L, alpha_R + real(wp), dimension(10) :: Ys_L, Ys_R + real(wp), dimension(10) :: Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR + real(wp), dimension(10) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 + real(wp), dimension(3, 3) :: vel_grad_L, vel_grad_R !< Averaged velocity gradient tensor `d(vel_i)/d(coord_j)`. +#:else real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R - real(wp) :: rho_L, rho_R real(wp), dimension(num_vels) :: vel_L, vel_R - real(wp) :: pres_L, pres_R - real(wp) :: E_L, E_R - real(wp) :: H_L, H_R real(wp), dimension(num_fluids) :: alpha_L, alpha_R real(wp), dimension(num_species) :: Ys_L, Ys_R real(wp), dimension(num_species) :: Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR real(wp), dimension(num_species) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 + real(wp), dimension(num_dims, num_dims) :: vel_grad_L, vel_grad_R !< Averaged velocity gradient tensor `d(vel_i)/d(coord_j)`. +#:endif + real(wp) :: rho_L, rho_R + + real(wp) :: pres_L, pres_R + real(wp) :: E_L, E_R + real(wp) :: H_L, H_R real(wp) :: Cp_avg, Cv_avg, T_avg, eps, c_sum_Yi_Phi real(wp) :: T_L, T_R real(wp) :: Y_L, Y_R @@ -1104,8 +1131,6 @@ contains type(riemann_states_vec3) :: cm ! Conservative momentum variables integer :: i, j, k, l, q !< Generic loop iterators - - real(wp), dimension(num_dims, num_dims) :: vel_grad_L, vel_grad_R !< Averaged velocity gradient tensor `d(vel_i)/d(coord_j)`. integer, dimension(3) :: idx_right_phys !< Physical (j,k,l) indices for right state. ! Populating the buffers of the left and right Riemann problem @@ -1249,8 +1274,13 @@ contains call get_mixture_molecular_weight(Ys_L, MW_L) call get_mixture_molecular_weight(Ys_R, MW_R) +#:if USING_AMD + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights_nonparameter(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights_nonparameter(:) +#:else Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) +#:endif R_gas_L = gas_constant/MW_L R_gas_R = gas_constant/MW_R @@ -1943,8 +1973,8 @@ contains type(int_bounds_info), intent(in) :: ix, iy, iz #:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(2) :: alpha_rho_L, alpha_rho_R - real(wp), dimension(2) :: alpha_L, alpha_R + real(wp), dimension(3) :: alpha_rho_L, alpha_rho_R + real(wp), dimension(3) :: alpha_L, alpha_R real(wp), dimension(3) :: vel_L, vel_R #:else real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R @@ -1956,8 +1986,13 @@ contains real(wp) :: pres_L, pres_R real(wp) :: E_L, E_R real(wp) :: H_L, H_R +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(10) :: Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR + real(wp), dimension(10) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 +#:else real(wp), dimension(num_species) :: Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR real(wp), dimension(num_species) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 +#:endif real(wp) :: Cp_avg, Cv_avg, T_avg, c_sum_Yi_Phi, eps real(wp) :: T_L, T_R real(wp) :: MW_L, MW_R @@ -3227,8 +3262,13 @@ contains call get_mixture_molecular_weight(Ys_L, MW_L) call get_mixture_molecular_weight(Ys_R, MW_R) +#:if USING_AMD + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights_nonparameter(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights_nonparameter(:) +#:else Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) +#:endif R_gas_L = gas_constant/MW_L R_gas_R = gas_constant/MW_R @@ -3680,7 +3720,11 @@ contains type(int_bounds_info), intent(in) :: ix, iy, iz ! Local variables: +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha_L, alpha_R, alpha_rho_L, alpha_rho_R +#:else real(wp), dimension(num_fluids) :: alpha_L, alpha_R, alpha_rho_L, alpha_rho_R +#:endif type(riemann_states_vec3) :: vel type(riemann_states) :: rho, pres, E, H_no_mag type(riemann_states) :: gamma, pi_inf, qv @@ -4638,16 +4682,24 @@ contains type(int_bounds_info), intent(in) :: ix, iy, iz ! Local variables +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: avg_v_int !!< Averaged interface velocity $(v_x, v_y, v_z)$ (grid directions). + real(wp), dimension(3) :: avg_dvdx_int !!< Averaged interface $\partial v_i/\partial x$ (grid dir 1). + real(wp), dimension(3) :: avg_dvdy_int !!< Averaged interface $\partial v_i/\partial y$ (grid dir 2). + real(wp), dimension(3) :: avg_dvdz_int !!< Averaged interface $\partial v_i/\partial z$ (grid dir 3). + real(wp), dimension(3) :: vel_src_int !!< Interface velocity $(v_1,v_2,v_3)$ (grid directions) for viscous work. + real(wp), dimension(3) :: stress_vector_shear !!< Shear stress vector $(\sigma_{N1}, \sigma_{N2}, \sigma_{N3})$ on N-face (grid directions). +#:else real(wp), dimension(num_dims) :: avg_v_int !!< Averaged interface velocity $(v_x, v_y, v_z)$ (grid directions). real(wp), dimension(num_dims) :: avg_dvdx_int !!< Averaged interface $\partial v_i/\partial x$ (grid dir 1). real(wp), dimension(num_dims) :: avg_dvdy_int !!< Averaged interface $\partial v_i/\partial y$ (grid dir 2). real(wp), dimension(num_dims) :: avg_dvdz_int !!< Averaged interface $\partial v_i/\partial z$ (grid dir 3). - + real(wp), dimension(num_dims) :: vel_src_int !!< Interface velocity $(v_1,v_2,v_3)$ (grid directions) for viscous work. real(wp), dimension(num_dims) :: stress_vector_shear !!< Shear stress vector $(\sigma_{N1}, \sigma_{N2}, \sigma_{N3})$ on N-face (grid directions). +#:endif real(wp) :: stress_normal_bulk !!< Normal bulk stress component $\sigma_{NN}$ on N-face. real(wp) :: Re_s, Re_b !!< Effective interface shear and bulk Reynolds numbers. - real(wp), dimension(num_dims) :: vel_src_int !!< Interface velocity $(v_1,v_2,v_3)$ (grid directions) for viscous work. real(wp) :: r_eff !!< Effective radius at interface for cylindrical terms. real(wp) :: div_v_term_const !!< Common term $-(2/3)(\nabla \cdot \mathbf{v}) / \text{Re}_s$ for shear stress diagonal. real(wp) :: divergence_cyl !!< Full divergence $\nabla \cdot \mathbf{v}$ in cylindrical coordinates. @@ -4813,10 +4865,17 @@ contains integer, intent(in) :: norm_dir ! Local variables +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3, 3) :: vel_grad_avg !< Averaged velocity gradient tensor `d(vel_i)/d(coord_j)`. + real(wp), dimension(3, 3) :: current_tau_shear !< Current shear stress tensor. + real(wp), dimension(3, 3) :: current_tau_bulk !< Current bulk stress tensor. + real(wp), dimension(3) :: vel_src_at_interface !< Interface velocities (u,v,w) for viscous work. +#:else real(wp), dimension(num_dims, num_dims) :: vel_grad_avg !< Averaged velocity gradient tensor `d(vel_i)/d(coord_j)`. real(wp), dimension(num_dims, num_dims) :: current_tau_shear !< Current shear stress tensor. real(wp), dimension(num_dims, num_dims) :: current_tau_bulk !< Current bulk stress tensor. real(wp), dimension(num_dims) :: vel_src_at_interface !< Interface velocities (u,v,w) for viscous work. +#:endif integer, dimension(3) :: idx_right_phys !< Physical (j,k,l) indices for right state. real(wp) :: Re_shear !< Interface shear Reynolds number. @@ -4929,10 +4988,15 @@ contains $:GPU_ROUTINE(parallelism='[seq]') ! Arguments +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3, 3), intent(in) :: vel_grad_avg + real(wp), dimension(3, 3), intent(out) :: tau_shear_out +#:else real(wp), dimension(num_dims, num_dims), intent(in) :: vel_grad_avg + real(wp), dimension(num_dims, num_dims), intent(out) :: tau_shear_out +#:endif real(wp), intent(in) :: Re_shear real(wp), intent(in) :: divergence_v - real(wp), dimension(num_dims, num_dims), intent(out) :: tau_shear_out ! Local variables integer :: i_dim !< Loop iterator for face normal. @@ -4963,7 +5027,11 @@ contains ! Arguments real(wp), intent(in) :: Re_bulk real(wp), intent(in) :: divergence_v +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3, 3), intent(out) :: tau_bulk_out +#:else real(wp), dimension(num_dims, num_dims), intent(out) :: tau_bulk_out +#:endif ! Local variables integer :: i_dim !< Loop iterator for diagonal components. diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index 873a614cbb..64f9fd427d 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -98,7 +98,7 @@ contains type(scalar_field), intent(in), dimension(sys_size) :: q_prim_vf #:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), intent(inout), dimension(2) :: alpha + real(wp), intent(inout), dimension(3) :: alpha real(wp), intent(inout), dimension(3) :: vel #:else real(wp), intent(inout), dimension(num_fluids) :: alpha @@ -109,7 +109,7 @@ contains integer, intent(in) :: j, k, l real(wp), dimension(2), intent(inout) :: Re #:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(2) :: alpha_rho, Gs + real(wp), dimension(3) :: alpha_rho, Gs #:else real(wp), dimension(num_fluids) :: alpha_rho, Gs #:endif diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index d51b67f659..3afa1bd4b2 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -82,8 +82,11 @@ contains intent(inout) :: flux_src_vf integer, intent(in) :: id type(int_bounds_info), intent(in) :: isx, isy, isz - - real(wp), dimension(num_dims, num_dims) :: Omega +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3, 3) :: Omega +#:else + real(wp), dimension(num_dims, num_dims) :: Omega +#:endif real(wp) :: w1L, w1R, w2L, w2R, w3L, w3R, w1, w2, w3 real(wp) :: normWL, normWR, normW integer :: j, k, l, i diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index cd4be44028..c34792f554 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -463,14 +463,18 @@ contains @:ALLOCATE(bc_type(1,1)%sf(0:0,0:n,0:p)) @:ALLOCATE(bc_type(1,2)%sf(0:0,0:n,0:p)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 if (n > 0) then @:ALLOCATE(bc_type(2,1)%sf(-buff_size:m+buff_size,0:0,0:p)) @:ALLOCATE(bc_type(2,2)%sf(-buff_size:m+buff_size,0:0,0:p)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 if (p > 0) then @:ALLOCATE(bc_type(3,1)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,0:0)) @:ALLOCATE(bc_type(3,2)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,0:0)) end if + #:endif end if + #:endif do i = 1, num_dims do j = 1, 2 @@ -718,10 +722,15 @@ contains impure subroutine s_compute_dt() real(wp) :: rho !< Cell-avg. density +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: vel !< Cell-avg. velocity + real(wp), dimension(3) :: alpha !< Cell-avg. volume fraction +#:else real(wp), dimension(num_vels) :: vel !< Cell-avg. velocity - real(wp) :: vel_sum !< Cell-avg. velocity sum - real(wp) :: pres !< Cell-avg. pressure real(wp), dimension(num_fluids) :: alpha !< Cell-avg. volume fraction +#:endif + real(wp) :: vel_sum !< Cell-avg. velocity sum + real(wp) :: pres !< Cell-avg. pressure real(wp) :: gamma !< Cell-avg. sp. heat ratio real(wp) :: pi_inf !< Cell-avg. liquid stiffness function real(wp) :: qv !< Cell-avg. fluid reference energy diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index f3fc59ee43..3cd6099539 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -72,9 +72,13 @@ contains real(wp) :: rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum !< Mixture variables real(wp), dimension(2) :: Re_visc +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha_visc, alpha_rho_visc + real(wp), dimension(3, 3) :: tau_Re +#:else real(wp), dimension(num_fluids) :: alpha_visc, alpha_rho_visc - real(wp), dimension(num_dims, num_dims) :: tau_Re +#:endif integer :: i, j, k, l, q !< Generic loop iterator @@ -97,7 +101,7 @@ contains #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 if (shear_stress) then ! Shear stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum ,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -207,7 +211,7 @@ contains #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 if (bulk_stress) then ! Bulk stresses - $:GPU_PARALLEL_LOOP(collapse=3, private='[alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') + $:GPU_PARALLEL_LOOP(collapse=3, private='[i,j,k,l,rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum ,alpha_visc, alpha_rho_visc, Re_visc, tau_Re]') do l = is3_viscous%beg, is3_viscous%end do k = -1, 1 do j = is1_viscous%beg, is1_viscous%end @@ -1158,56 +1162,54 @@ contains recon_dir, & is1_viscous, is2_viscous, is3_viscous) end if + end if + #:endfor - if (viscous .or. dummy) then - #:if not MFC_CASE_OPTIMIZATION or viscous - if (weno_Re_flux) then - if (norm_dir == 2) then - $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) - vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) - end do - end do + if (viscous .or. dummy) then + if (weno_Re_flux) then + if (norm_dir == 2) then + $:GPU_PARALLEL_LOOP(collapse=4) + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + vL_prim_vf(i)%sf(k, j, l) = vL_y(j, k, l, i) + vR_prim_vf(i)%sf(k, j, l) = vR_y(j, k, l, i) end do end do - $:END_GPU_PARALLEL_LOOP() - elseif (norm_dir == 3) then - $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do j = is1_viscous%beg, is1_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do l = is3_viscous%beg, is3_viscous%end - vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) - vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) - end do - end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + elseif (norm_dir == 3) then + $:GPU_PARALLEL_LOOP(collapse=4) + do i = iv%beg, iv%end + do j = is1_viscous%beg, is1_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do l = is3_viscous%beg, is3_viscous%end + vL_prim_vf(i)%sf(l, k, j) = vL_z(j, k, l, i) + vR_prim_vf(i)%sf(l, k, j) = vR_z(j, k, l, i) end do end do - $:END_GPU_PARALLEL_LOOP() - elseif (norm_dir == 1) then - $:GPU_PARALLEL_LOOP(collapse=4) - do i = iv%beg, iv%end - do l = is3_viscous%beg, is3_viscous%end - do k = is2_viscous%beg, is2_viscous%end - do j = is1_viscous%beg, is1_viscous%end - vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) - vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) - end do - end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + elseif (norm_dir == 1) then + $:GPU_PARALLEL_LOOP(collapse=4) + do i = iv%beg, iv%end + do l = is3_viscous%beg, is3_viscous%end + do k = is2_viscous%beg, is2_viscous%end + do j = is1_viscous%beg, is1_viscous%end + vL_prim_vf(i)%sf(j, k, l) = vL_x(j, k, l, i) + vR_prim_vf(i)%sf(j, k, l) = vR_x(j, k, l, i) end do end do - $:END_GPU_PARALLEL_LOOP() - end if - end if - #:endif + end do + end do + $:END_GPU_PARALLEL_LOOP() end if end if + end if - #:endfor end subroutine s_reconstruct_cell_boundary_values_visc_deriv !> The purpose of this subroutine is to employ the inputted diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index 6e8b668326..ff6a0e5f93 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -665,7 +665,7 @@ contains real(wp), dimension(-3:3) :: v ! temporary field value array for clarity (WENO7 only) real(wp) :: tau - integer :: i, j, k, l + integer :: i, j, k, l, q is1_weno = is1_weno_d is2_weno = is2_weno_d @@ -808,7 +808,7 @@ contains #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 1 #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3,private='[dvd,poly,beta,alpha,omega,tau,delta]') + $:GPU_PARALLEL_LOOP(collapse=3,private='[dvd,poly,beta,alpha,omega,tau,delta,q]') do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end @@ -816,6 +816,11 @@ contains do i = 1, v_size ! reconstruct from left side + alpha(:) = 0._wp + omega(:) = 0._wp + delta(:) = 0._wp + beta(:) = weno_eps + dvd(1) = v_rs_ws_${XYZ}$ (j + 2, k, l, i) & - v_rs_ws_${XYZ}$ (j + 1, k, l, i) dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & @@ -862,23 +867,36 @@ contains ! Borges, et al. (2008) tau = abs(beta(2) - beta(0)) ! Equation 25 - alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + tau/beta(0:weno_num_stencils)) ! Equation 28 (note: weno_eps was already added to beta) + $:GPU_LOOP(parallelism='[seq]') + do q = 0, weno_num_stencils + alpha(q) = d_cbL_${XYZ}$ (q, j)*(1._wp + (tau/beta(q))) ! Equation 28 (note: weno_eps was already added to beta) + end do elseif (teno) then ! Fu, et al. (2016) ! Fu''s code: https://dx.doi.org/10.13140/RG.2.2.36250.34247 tau = abs(beta(2) - beta(0)) - alpha = 1._wp + tau/beta ! Equation 22 (reuse alpha as gamma; pick C=1 & q=6) - alpha = (alpha*alpha*alpha)**2._wp ! Equation 22 cont. (some CPU compilers cannot optimize x**6.0) + $:GPU_LOOP(parallelism='[seq]') + do q = 0, weno_num_stencils + alpha(q) = 1._wp + tau/beta(q) ! Equation 22 (reuse alpha as gamma; pick C=1 & q=6) + alpha(q) = (alpha(q)**3._wp)**2._wp ! Equation 22 cont. (some CPU compilers cannot optimize x**6.0) + end do omega = alpha/sum(alpha) ! Equation 25 (reuse omega as xi) - delta = merge(0._wp, 1._wp, omega < teno_CT)! Equation 26 - alpha(0:weno_num_stencils) = delta(0:weno_num_stencils)*d_cbL_${XYZ}$ (0:weno_num_stencils, j) ! Equation 27 - + + $:GPU_LOOP(parallelism='[seq]') + do q = 0, weno_num_stencils + if(omega(q) < teno_CT) then ! Equation 26 + delta(q) = 0._wp + else + delta(q) = 1._wp + end if + alpha(q) = delta(q)*d_cbL_${XYZ}$ (q, j) ! Equation 27 + end do end if omega = alpha/sum(alpha) - vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + vL_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2) ! reconstruct from right side @@ -904,16 +922,21 @@ contains elseif (wenoz) then - alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + tau/beta(0:weno_num_stencils)) + $:GPU_LOOP(parallelism='[seq]') + do q = 0, weno_num_stencils + alpha(q) = d_cbR_${XYZ}$ (q, j)*(1._wp + (tau/beta(q))) + end do elseif (teno) then - alpha(0:weno_num_stencils) = delta(0:weno_num_stencils)*d_cbR_${XYZ}$ (0:weno_num_stencils, j) - + $:GPU_LOOP(parallelism='[seq]') + do q = 0, weno_num_stencils + alpha(q) = delta(q)*d_cbR_${XYZ}$ (q, j) + end do end if omega = alpha/sum(alpha) - vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + vR_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2) end do end do @@ -933,13 +956,18 @@ contains #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 2 #:for WENO_DIR, XYZ in [(1, 'x'), (2, 'y'), (3, 'z')] if (weno_dir == ${WENO_DIR}$) then - $:GPU_PARALLEL_LOOP(collapse=3,private='[poly,beta,alpha,omega,tau,delta,dvd,v]') + $:GPU_PARALLEL_LOOP(collapse=3,private='[poly,beta,alpha,omega,tau,delta,dvd,v,q]') do l = is3_weno%beg, is3_weno%end do k = is2_weno%beg, is2_weno%end do j = is1_weno%beg, is1_weno%end $:GPU_LOOP(parallelism='[seq]') do i = 1, v_size + alpha(:) = 0._wp + omega(:) = 0._wp + delta(:) = 0._wp + beta(:) = weno_eps + if (teno) v = v_rs_ws_${XYZ}$ (j - 3:j + 3, k, l, i) ! temporary field value array for clarity if (.not. teno) then @@ -1056,7 +1084,10 @@ contains ! Castro, et al. (2010) ! Don & Borges (2013) also helps tau = abs(beta(3) - beta(0)) ! Equation 50 - alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + (tau/beta(0:weno_num_stencils))**wenoz_q) ! q = 2,3,4 for stability + $:GPU_LOOP(parallelism='[seq]') + do q = 0, weno_num_stencils + alpha(q) = d_cbL_${XYZ}$ (q, j)*(1._wp + (tau/beta(q))**wenoz_q) ! wenoz_q = 2,3,4 for stability + end do elseif (teno) then #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 @@ -1064,14 +1095,28 @@ contains alpha = 1._wp + tau/beta alpha = (alpha**3._wp)**2._wp ! some CPU compilers cannot optimize x**6.0 omega = alpha/sum(alpha) - delta = merge(0._wp, 1._wp, omega < teno_CT) - alpha(0:weno_num_stencils) = delta(0:weno_num_stencils)*d_cbL_${XYZ}$ (0:weno_num_stencils, j) + + $:GPU_LOOP(parallelism='[seq]') + do q = 0, weno_num_stencils + if(omega(q) < teno_CT) then ! Equation 26 + delta(q) = 0._wp + else + delta(q) = 1._wp + end if + alpha(q) = delta(q)*d_cbL_${XYZ}$ (q, j) ! Equation 27 + end do #:endif end if omega = alpha/sum(alpha) - vL_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + vL_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2) + omega(3)*poly(3) + + if(teno) then + #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 + vL_rs_vf_${XYZ}$ (j, k, l, i) = vL_rs_vf_${XYZ}$ (j, k, l, i) + omega(4)*poly(4) + #:endif + end if if (.not. teno) then poly(3) = v_rs_ws_${XYZ}$ (j, k, l, i) & @@ -1111,16 +1156,27 @@ contains elseif (wenoz) then - alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + tau/beta(0:weno_num_stencils)) + $:GPU_LOOP(parallelism='[seq]') + do q = 0, weno_num_stencils + alpha(q) = d_cbR_${XYZ}$ (q, j)*(1._wp + (tau/beta(q))**wenoz_q) ! wenoz_q = 2,3,4 for stability + end do elseif (teno) then - alpha(0:weno_num_stencils) = delta(0:weno_num_stencils)*d_cbR_${XYZ}$ (0:weno_num_stencils, j) - + $:GPU_LOOP(parallelism='[seq]') + do q = 0, weno_num_stencils + alpha(q) = delta(q)*d_cbR_${XYZ}$ (q, j) + end do end if omega = alpha/sum(alpha) - vR_rs_vf_${XYZ}$ (j, k, l, i) = sum(omega*poly) + vR_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2) + omega(3)*poly(3) + + if(teno) then + #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 + vR_rs_vf_${XYZ}$ (j, k, l, i) = vR_rs_vf_${XYZ}$ (j, k, l, i) + omega(4)*poly(4) + #:endif + end if end do end do diff --git a/toolchain/mfc/test/case.py b/toolchain/mfc/test/case.py index 5a439aaead..c5ffdd301a 100644 --- a/toolchain/mfc/test/case.py +++ b/toolchain/mfc/test/case.py @@ -264,6 +264,8 @@ def compute_tolerance(self) -> float: tolerance = 1e-9 elif self.params.get("mhd", 'F') == 'T': tolerance = 1e-8 + elif "Axisymmetric" in self.trace.split(" -> "): + tolerance = 1e-11 return 1e8 * tolerance if single else tolerance From 1386ed981e315ce2150fcfa3575d5a930b59b6f3 Mon Sep 17 00:00:00 2001 From: Anand Date: Sun, 25 Jan 2026 09:51:20 -0500 Subject: [PATCH 03/21] cleanup --- src/common/m_phase_change.fpp | 6 +- src/simulation/m_bubbles_EL.fpp | 2 +- src/simulation/m_cbc.fpp | 9 ++- src/simulation/m_compute_cbc.fpp | 103 ++++++++++++++++++++---------- src/simulation/m_hyperelastic.fpp | 8 ++- src/simulation/m_qbmm.fpp | 13 ++-- src/simulation/m_weno.fpp | 2 +- 7 files changed, 94 insertions(+), 49 deletions(-) diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 7a88a7b323..0ce4e24392 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -308,14 +308,16 @@ contains real(wp), intent(in) :: rhoe real(wp), intent(out) :: TS real(wp) :: gp, gpp, hp, pO, mCP, mQ !< variables for the Newton Solver + real(wp) :: p_infpT_sum integer :: i, ns !< generic loop iterators ! auxiliary variables for the pT-equilibrium solver - mCP = 0.0_wp; mQ = 0.0_wp; + mCP = 0.0_wp; mQ = 0.0_wp; p_infpT_sum = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids p_infpT(i) = ps_inf(i) + p_infpT_sum = p_infpT_sum + abs(p_infpT(i)) end do ! Performing tests before initializing the pT-equilibrium $:GPU_LOOP(parallelism='[seq]') @@ -332,7 +334,7 @@ contains if(num_fluids < 3) then $:GPU_LOOP(parallelism='[seq]') do i = num_fluids+1, 3 - p_infpT(i) = 10**32_wp + p_infpT(i) = p_infpT_sum end do end if diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 2f9937f140..28e9c8c27b 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -540,7 +540,7 @@ contains #:if not MFC_CASE_OPTIMIZATION and USING_AMD real(wp), dimension(3) :: myalpha_rho, myalpha #:else - real(wp), dimension(contxe) :: myalpha_rho, myalpha + real(wp), dimension(num_fluids) :: myalpha_rho, myalpha #:endif real(wp), dimension(2) :: Re integer, dimension(3) :: cell diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 0f3f0011b7..bd0abfbd20 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -650,20 +650,23 @@ contains real(wp) :: dpi_inf_dt real(wp) :: dqv_dt real(wp) :: dpres_ds +#:if USING_AMD + real(wp), dimension(12) :: L +#:else + real(wp), dimension(sys_size) :: L +#:endif #:if not MFC_CASE_OPTIMIZATION and USING_AMD real(wp), dimension(3) :: alpha_rho, dalpha_rho_ds, mf real(wp), dimension(3) :: vel, dvel_ds real(wp), dimension(3) :: adv_local, dadv_ds - real(wp), dimension(12) :: L real(wp), dimension(3) :: dadv_dt real(wp), dimension(3) :: dvel_dt real(wp), dimension(3) :: dalpha_rho_dt real(wp), dimension(10) :: Ys, h_k, dYs_dt, dYs_ds, Xs, Gamma_i, Cp_i #:else - real(wp), dimension(contxe) :: alpha_rho, dalpha_rho_ds, mf + real(wp), dimension(num_fluids) :: alpha_rho, dalpha_rho_ds, mf real(wp), dimension(num_vels) :: vel, dvel_ds real(wp), dimension(num_fluids) :: adv_local, dadv_ds - real(wp), dimension(sys_size) :: L real(wp), dimension(num_fluids) :: dadv_dt real(wp), dimension(num_dims) :: dvel_dt real(wp), dimension(num_fluids) :: dalpha_rho_dt diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp index aabd7d726d..aa0dd699f6 100644 --- a/src/simulation/m_compute_cbc.fpp +++ b/src/simulation/m_compute_cbc.fpp @@ -35,11 +35,14 @@ contains !> Fill density L variables subroutine s_fill_density_L(L, lambda_factor, lambda2, c, mf, dalpha_rho_ds, dpres_ds) $:GPU_ROUTINE(parallelism='[seq]') -#:if not MFC_CASE_OPTIMIZATION and USING_AMD +#:if USING_AMD real(wp), dimension(12), intent(inout) :: L - real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds #:else - real(wp), dimension(sys_size), intent(inout) :: L + real(wp), dimension(sys_size), intent(inout) :: L +#:endif +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds +#:else real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds #:endif real(wp), intent(in) :: lambda_factor, lambda2, c @@ -55,11 +58,14 @@ contains !> Fill velocity L variables subroutine s_fill_velocity_L(L, lambda_factor, lambda2, dvel_ds) $:GPU_ROUTINE(parallelism='[seq]') -#:if not MFC_CASE_OPTIMIZATION and USING_AMD +#:if USING_AMD real(wp), dimension(12), intent(inout) :: L - real(wp), dimension(3), intent(in) :: dvel_ds #:else - real(wp), dimension(sys_size), intent(inout) :: L + real(wp), dimension(sys_size), intent(inout) :: L +#:endif +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: dvel_ds +#:else real(wp), dimension(num_dims), intent(in) :: dvel_ds #:endif real(wp), intent(in) :: lambda_factor, lambda2 @@ -74,11 +80,14 @@ contains !> Fill advection L variables subroutine s_fill_advection_L(L, lambda_factor, lambda2, dadv_ds) $:GPU_ROUTINE(parallelism='[seq]') -#:if not MFC_CASE_OPTIMIZATION and USING_AMD +#:if USING_AMD real(wp), dimension(12), intent(inout) :: L - real(wp), dimension(3), intent(in) :: dadv_ds #:else - real(wp), dimension(sys_size), intent(inout) :: L + real(wp), dimension(sys_size), intent(inout) :: L +#:endif +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: dadv_ds +#:else real(wp), dimension(num_fluids), intent(in) :: dadv_ds #:endif real(wp), intent(in) :: lambda_factor, lambda2 @@ -93,11 +102,14 @@ contains !> Fill chemistry L variables subroutine s_fill_chemistry_L(L, lambda_factor, lambda2, dYs_ds) $:GPU_ROUTINE(parallelism='[seq]') -#:if not MFC_CASE_OPTIMIZATION and USING_AMD +#:if USING_AMD real(wp), dimension(12), intent(inout) :: L - real(wp), dimension(10), intent(in) :: dYs_ds #:else - real(wp), dimension(sys_size), intent(inout) :: L + real(wp), dimension(sys_size), intent(inout) :: L +#:endif +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(10), intent(in) :: dYs_ds +#:else real(wp), dimension(num_species), intent(in) :: dYs_ds #:endif real(wp), intent(in) :: lambda_factor, lambda2 @@ -117,11 +129,14 @@ contains & cray_inline=True) real(wp), dimension(3), intent(in) :: lambda -#:if not MFC_CASE_OPTIMIZATION and USING_AMD +#:if USING_AMD real(wp), dimension(12), intent(inout) :: L - real(wp), dimension(3), intent(in) :: dvel_ds #:else - real(wp), dimension(sys_size), intent(inout) :: L + real(wp), dimension(sys_size), intent(inout) :: L +#:endif +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: dvel_ds +#:else real(wp), dimension(num_dims), intent(in) :: dvel_ds #:endif real(wp), intent(in) :: rho, c, dpres_ds @@ -138,14 +153,17 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda -#:if not MFC_CASE_OPTIMIZATION and USING_AMD +#:if USING_AMD real(wp), dimension(12), intent(inout) :: L +#:else + real(wp), dimension(sys_size), intent(inout) :: L +#:endif +#:if not MFC_CASE_OPTIMIZATION and USING_AMD real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds real(wp), dimension(3), intent(in) :: dvel_ds real(wp), dimension(3), intent(in) :: dadv_ds real(wp), dimension(10), intent(in) :: dYs_ds -#:else - real(wp), dimension(sys_size), intent(inout) :: L +#:else real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds real(wp), dimension(num_dims), intent(in) :: dvel_ds real(wp), dimension(num_fluids), intent(in) :: dadv_ds @@ -175,11 +193,14 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda -#:if not MFC_CASE_OPTIMIZATION and USING_AMD +#:if USING_AMD real(wp), dimension(12), intent(inout) :: L +#:else + real(wp), dimension(sys_size), intent(inout) :: L +#:endif +#:if not MFC_CASE_OPTIMIZATION and USING_AMD real(wp), dimension(3), intent(in) :: dvel_ds -#:else - real(wp), dimension(sys_size), intent(inout) :: L +#:else real(wp), dimension(num_dims), intent(in) :: dvel_ds #:endif real(wp), intent(in) :: rho, c, dpres_ds @@ -196,14 +217,17 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda -#:if not MFC_CASE_OPTIMIZATION and USING_AMD +#:if USING_AMD real(wp), dimension(12), intent(inout) :: L +#:else + real(wp), dimension(sys_size), intent(inout) :: L +#:endif +#:if not MFC_CASE_OPTIMIZATION and USING_AMD real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds real(wp), dimension(3), intent(in) :: dvel_ds real(wp), dimension(3), intent(in) :: dadv_ds real(wp), dimension(10), intent(in) :: dYs_ds -#:else - real(wp), dimension(sys_size), intent(inout) :: L +#:else real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds real(wp), dimension(num_dims), intent(in) :: dvel_ds real(wp), dimension(num_fluids), intent(in) :: dadv_ds @@ -226,13 +250,16 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda -#:if not MFC_CASE_OPTIMIZATION and USING_AMD +#:if USING_AMD real(wp), dimension(12), intent(inout) :: L +#:else + real(wp), dimension(sys_size), intent(inout) :: L +#:endif +#:if not MFC_CASE_OPTIMIZATION and USING_AMD real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds real(wp), dimension(3), intent(in) :: dvel_ds real(wp), dimension(3), intent(in) :: dadv_ds -#:else - real(wp), dimension(sys_size), intent(inout) :: L +#:else real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds real(wp), dimension(num_dims), intent(in) :: dvel_ds real(wp), dimension(num_fluids), intent(in) :: dadv_ds @@ -253,13 +280,16 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda -#:if not MFC_CASE_OPTIMIZATION and USING_AMD +#:if USING_AMD real(wp), dimension(12), intent(inout) :: L +#:else + real(wp), dimension(sys_size), intent(inout) :: L +#:endif +#:if not MFC_CASE_OPTIMIZATION and USING_AMD real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds real(wp), dimension(3), intent(in) :: dvel_ds real(wp), dimension(3), intent(in) :: dadv_ds -#:else - real(wp), dimension(sys_size), intent(inout) :: L +#:else real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds real(wp), dimension(num_dims), intent(in) :: dvel_ds real(wp), dimension(num_fluids), intent(in) :: dadv_ds @@ -279,10 +309,10 @@ contains subroutine s_compute_supersonic_inflow_L(L) $:GPU_ROUTINE(function_name='s_compute_supersonic_inflow_L', & & parallelism='[seq]', cray_inline=True) -#:if not MFC_CASE_OPTIMIZATION and USING_AMD +#:if USING_AMD real(wp), dimension(12), intent(inout) :: L #:else - real(wp), dimension(sys_size), intent(inout) :: L + real(wp), dimension(sys_size), intent(inout) :: L #:endif L(1:advxe) = 0._wp if (chemistry) L(chemxb:chemxe) = 0._wp @@ -294,14 +324,17 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda -#:if not MFC_CASE_OPTIMIZATION and USING_AMD +#:if USING_AMD real(wp), dimension(12), intent(inout) :: L +#:else + real(wp), dimension(sys_size), intent(inout) :: L +#:endif +#:if not MFC_CASE_OPTIMIZATION and USING_AMD real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds real(wp), dimension(3), intent(in) :: dvel_ds real(wp), dimension(3), intent(in) :: dadv_ds real(wp), dimension(10), intent(in) :: dYs_ds -#:else - real(wp), dimension(sys_size), intent(inout) :: L +#:else real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds real(wp), dimension(num_dims), intent(in) :: dvel_ds real(wp), dimension(num_fluids), intent(in) :: dadv_ds diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index 599ef3302f..52914d9938 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -98,11 +98,15 @@ contains type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf -#:if not MFC_CASE_OPTIMIZATION and USING_AMD +#:if USING_AMD real(wp), dimension(9) :: tensora, tensorb - real(wp), dimension(3) :: alpha_k, alpha_rho_k #:else real(wp), dimension(tensor_size) :: tensora, tensorb +#:endif + +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha_k, alpha_rho_k +#:else real(wp), dimension(num_fluids) :: alpha_k, alpha_rho_k #:endif real(wp), dimension(2) :: Re diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 2f2dd46ed5..53596338a2 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -571,7 +571,7 @@ contains & cray_inline=True) real(wp), intent(in) :: pres, rho, c -#:if not MFC_CASE_OPTIMIZATION and USING_AMD +#:if USING_AMD real(wp), dimension(32, 0:2, 0:2), intent(out) :: coeffs #:else real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs @@ -650,7 +650,7 @@ contains & cray_inline=True) real(wp), intent(in) :: pres, rho, c -#:if not MFC_CASE_OPTIMIZATION and USING_AMD +#:if USING_AMD real(wp), dimension(32, 0:2, 0:2), intent(out) :: coeffs #:else real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs @@ -726,11 +726,14 @@ contains #:if not MFC_CASE_OPTIMIZATION and USING_AMD real(wp), dimension(6) :: moms, msum real(wp), dimension(4, 3) :: wght, abscX, abscY, wght_pb, wght_mv, wght_ht, ht - real(wp), dimension(32, 0:2, 0:2) :: coeff #:else real(wp), dimension(nmom) :: moms, msum real(wp), dimension(nnode, nb) :: wght, abscX, abscY, wght_pb, wght_mv, wght_ht, ht - real(wp), dimension(nterms, 0:2, 0:2) :: coeff +#:endif +#:if USING_AMD + real(wp), dimension(32, 0:2, 0:2), intent(out) :: coeff +#:else + real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeff #:endif real(wp) :: pres, rho, nbub, c, alf, momsum, drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, grad_T real(wp) :: n_tait, B_tait @@ -884,7 +887,7 @@ contains $:GPU_ROUTINE(function_name='s_coeff_selector',parallelism='[seq]', & & cray_inline=True) real(wp), intent(in) :: pres, rho, c -#:if not MFC_CASE_OPTIMIZATION and USING_AMD +#:if USING_AMD real(wp), dimension(32, 0:2, 0:2), intent(out) :: coeff #:else real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeff diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index ff6a0e5f93..ed1f5576ae 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -647,7 +647,7 @@ contains integer, intent(in) :: weno_dir type(int_bounds_info), intent(in) :: is1_weno_d, is2_weno_d, is3_weno_d -#:if USING_AMD and not MFC_CASE_OPTIMIZATION +#:if not MFC_CASE_OPTIMIZATION and USING_AMD real(wp), dimension(-3:2) :: dvd real(wp), dimension(0:4) :: poly real(wp), dimension(0:4) :: alpha From f3ee9fa1114eb53ae034708e01d54030b8ea74c7 Mon Sep 17 00:00:00 2001 From: Anand Radhakrishnan Date: Mon, 26 Jan 2026 21:27:39 -0500 Subject: [PATCH 04/21] 1D Dirichlet case fixed --- CMakeLists.txt | 10 ++++------ src/common/m_boundary_common.fpp | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c63582a3e..77f398be7e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -185,11 +185,9 @@ elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") add_link_options("SHELL: -K trap=fp" "SHELL: -G2") endif() -elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Flang") +elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") add_compile_options( - $<$:-Mfreeform> - $<$:-Mpreprocess> - $<$:-fdefault-real-8> + #$<$:-Wall> ) elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Intel") add_compile_options($<$:-free>) @@ -529,8 +527,8 @@ function(MFC_SETUP_TARGET) target_compile_options(${a_target} PRIVATE -fopenmp) target_link_options(${a_target} PRIVATE -fopenmp) elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") - target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a $ENV{CRAY_MPICH_INC}) - target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a $ENV{CRAY_MPICH_LIB}) + target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a $ENV{CRAY_MPICH_INC}) + target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a $ENV{CRAY_MPICH_LIB}) endif() endif() diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 333df31442..8f5412f149 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -44,7 +44,7 @@ module m_boundary_common s_populate_grid_variables_buffers, & s_finalize_boundary_common_module - !public :: bc_buffers + public :: bc_buffers #ifdef MFC_MPI public :: MPI_BC_TYPE_TYPE, MPI_BC_BUFFER_TYPE @@ -56,7 +56,7 @@ contains integer :: i, j - @:ALLOCATE(bc_buffers(1:num_dims, 1:2)) + @:ALLOCATE(bc_buffers(1:3, 1:2)) if (bc_io) then @:ALLOCATE(bc_buffers(1, 1)%sf(1:sys_size, 0:n, 0:p)) From fe48823526c176f4822261133a88ab226919e16e Mon Sep 17 00:00:00 2001 From: Anand Radhakrishnan Date: Mon, 26 Jan 2026 22:29:05 -0500 Subject: [PATCH 05/21] cleanup --- src/common/m_phase_change.fpp | 2 + src/common/m_variables_conversion.fpp | 2 +- src/simulation/m_fftw.fpp | 462 +++++++++++++------------- src/simulation/m_qbmm.fpp | 4 +- 4 files changed, 236 insertions(+), 234 deletions(-) diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index 0ce4e24392..a7742c0532 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -331,12 +331,14 @@ contains end do +#:if not MFC_CASE_OPTIMIZATION and USING_AMD if(num_fluids < 3) then $:GPU_LOOP(parallelism='[seq]') do i = num_fluids+1, 3 p_infpT(i) = p_infpT_sum end do end if +#:endif ! Checking energy constraint if ((rhoe - mQ - minval(p_infpT)) < 0.0_wp) then diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index beb38bacf5..bc20350c91 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -253,7 +253,7 @@ contains real(wp), optional, dimension(2), intent(out) :: Re_K real(wp), optional, intent(out) :: G_K - real(wp), dimension(num_fluids), intent(inout) :: alpha_rho_K, alpha_K !< + real(wp), dimension(num_fluids) :: alpha_rho_K, alpha_K !< real(wp), optional, dimension(num_fluids), intent(in) :: G diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 411485af10..55f8c8d123 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -70,55 +70,55 @@ contains !! applying the Fourier filter in the azimuthal direction. impure subroutine s_initialize_fftw_module - integer :: ierr !< Generic flag used to identify and report GPU errors - - ! Size of input array going into DFT - real_size = p + 1 - ! Size of output array coming out of DFT - cmplx_size = (p + 1)/2 + 1 - - x_size = m + 1 - batch_size = x_size*sys_size - -#if defined(MFC_GPU) - rank = 1; istride = 1; ostride = 1 - - allocate (gpu_fft_size(1:rank), iembed(1:rank), oembed(1:rank)) - - gpu_fft_size(1) = real_size; - iembed(1) = 0 - oembed(1) = 0 - $:GPU_ENTER_DATA(copyin='[real_size,cmplx_size,x_size,sys_size,batch_size,Nfq]') - $:GPU_UPDATE(device='[real_size,cmplx_size,x_size,sys_size,batch_size]') -#else - ! Allocate input and output DFT data sizes - fftw_real_data = fftw_alloc_real(int(real_size, c_size_t)) - fftw_cmplx_data = fftw_alloc_complex(int(cmplx_size, c_size_t)) - fftw_fltr_cmplx_data = fftw_alloc_complex(int(cmplx_size, c_size_t)) - ! Associate input and output data pointers with allocated memory - call c_f_pointer(fftw_real_data, data_real, [real_size]) - call c_f_pointer(fftw_cmplx_data, data_cmplx, [cmplx_size]) - call c_f_pointer(fftw_fltr_cmplx_data, data_fltr_cmplx, [cmplx_size]) - - ! Generate plans for forward and backward DFTs - fwd_plan = fftw_plan_dft_r2c_1d(real_size, data_real, data_cmplx, FFTW_ESTIMATE) - bwd_plan = fftw_plan_dft_c2r_1d(real_size, data_fltr_cmplx, data_real, FFTW_ESTIMATE) -#endif - -#if defined(MFC_GPU) - @:ALLOCATE(data_real_gpu(1:real_size*x_size*sys_size)) - @:ALLOCATE(data_cmplx_gpu(1:cmplx_size*x_size*sys_size)) - @:ALLOCATE(data_fltr_cmplx_gpu(1:cmplx_size*x_size*sys_size)) - -#if defined(__PGI) - ierr = cufftPlanMany(fwd_plan_gpu, rank, gpu_fft_size, iembed, istride, real_size, oembed, ostride, cmplx_size, CUFFT_D2Z, batch_size) - ierr = cufftPlanMany(bwd_plan_gpu, rank, gpu_fft_size, iembed, istride, cmplx_size, oembed, ostride, real_size, CUFFT_Z2D, batch_size) -#else - ierr = hipfftPlanMany(fwd_plan_gpu, rank, gpu_fft_size, iembed, istride, real_size, oembed, ostride, cmplx_size, HIPFFT_D2Z, batch_size) - ierr = hipfftPlanMany(bwd_plan_gpu, rank, gpu_fft_size, iembed, istride, cmplx_size, oembed, ostride, real_size, HIPFFT_Z2D, batch_size) -#endif - -#endif +! integer :: ierr !< Generic flag used to identify and report GPU errors + +! ! Size of input array going into DFT +! real_size = p + 1 +! ! Size of output array coming out of DFT +! cmplx_size = (p + 1)/2 + 1 + +! x_size = m + 1 +! batch_size = x_size*sys_size + +! #if defined(MFC_GPU) +! rank = 1; istride = 1; ostride = 1 + +! allocate (gpu_fft_size(1:rank), iembed(1:rank), oembed(1:rank)) + +! gpu_fft_size(1) = real_size; +! iembed(1) = 0 +! oembed(1) = 0 +! $:GPU_ENTER_DATA(copyin='[real_size,cmplx_size,x_size,sys_size,batch_size,Nfq]') +! $:GPU_UPDATE(device='[real_size,cmplx_size,x_size,sys_size,batch_size]') +! #else +! ! Allocate input and output DFT data sizes +! fftw_real_data = fftw_alloc_real(int(real_size, c_size_t)) +! fftw_cmplx_data = fftw_alloc_complex(int(cmplx_size, c_size_t)) +! fftw_fltr_cmplx_data = fftw_alloc_complex(int(cmplx_size, c_size_t)) +! ! Associate input and output data pointers with allocated memory +! call c_f_pointer(fftw_real_data, data_real, [real_size]) +! call c_f_pointer(fftw_cmplx_data, data_cmplx, [cmplx_size]) +! call c_f_pointer(fftw_fltr_cmplx_data, data_fltr_cmplx, [cmplx_size]) + +! ! Generate plans for forward and backward DFTs +! fwd_plan = fftw_plan_dft_r2c_1d(real_size, data_real, data_cmplx, FFTW_ESTIMATE) +! bwd_plan = fftw_plan_dft_c2r_1d(real_size, data_fltr_cmplx, data_real, FFTW_ESTIMATE) +! #endif + +! #if defined(MFC_GPU) +! @:ALLOCATE(data_real_gpu(1:real_size*x_size*sys_size)) +! @:ALLOCATE(data_cmplx_gpu(1:cmplx_size*x_size*sys_size)) +! @:ALLOCATE(data_fltr_cmplx_gpu(1:cmplx_size*x_size*sys_size)) + +! #if defined(__PGI) +! ierr = cufftPlanMany(fwd_plan_gpu, rank, gpu_fft_size, iembed, istride, real_size, oembed, ostride, cmplx_size, CUFFT_D2Z, batch_size) +! ierr = cufftPlanMany(bwd_plan_gpu, rank, gpu_fft_size, iembed, istride, cmplx_size, oembed, ostride, real_size, CUFFT_Z2D, batch_size) +! #else +! ierr = hipfftPlanMany(fwd_plan_gpu, rank, gpu_fft_size, iembed, istride, real_size, oembed, ostride, cmplx_size, HIPFFT_D2Z, batch_size) +! ierr = hipfftPlanMany(bwd_plan_gpu, rank, gpu_fft_size, iembed, istride, cmplx_size, oembed, ostride, real_size, HIPFFT_Z2D, batch_size) +! #endif + +! #endif end subroutine s_initialize_fftw_module @@ -130,169 +130,169 @@ contains impure subroutine s_apply_fourier_filter(q_cons_vf) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf - integer :: i, j, k, l !< Generic loop iterators - integer :: ierr !< Generic flag used to identify and report GPU errors - - ! Restrict filter to processors that have cells adjacent to axis - if (bc_y%beg >= 0) return -#if defined(MFC_GPU) - - $:GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - - $:GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, 0, l) - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - - #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') -#if defined(__PGI) - ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) -#else - ierr = hipfftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) - call hipCheck(hipDeviceSynchronize()) -#endif - #:endcall GPU_HOST_DATA - Nfq = 3 - $:GPU_UPDATE(device='[Nfq]') - - $:GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - - #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') -#if defined(__PGI) - ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) -#else - ierr = hipfftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) - call hipCheck(hipDeviceSynchronize()) -#endif - #:endcall GPU_HOST_DATA - - $:GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - - do i = 1, fourier_rings - - $:GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, cmplx_size - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - - $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - - #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') -#if defined(__PGI) - ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) -#else - ierr = hipfftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) - call hipCheck(hipDeviceSynchronize()) -#endif - #:endcall GPU_HOST_DATA - - Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) - $:GPU_UPDATE(device='[Nfq]') - - $:GPU_PARALLEL_LOOP(collapse=3) - do k = 1, sys_size - do j = 0, m - do l = 1, Nfq - data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - - #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') -#if defined(__PGI) - ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) -#else - ierr = hipfftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) - call hipCheck(hipDeviceSynchronize()) -#endif - #:endcall GPU_HOST_DATA - - $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') - do k = 1, sys_size - do j = 0, m - do l = 0, p - data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) - q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - end do - -#else - Nfq = 3 - do j = 0, m - do k = 1, sys_size - data_fltr_cmplx(:) = (0_dp, 0_dp) - data_real(1:p + 1) = q_cons_vf(k)%sf(j, 0, 0:p) - call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) - data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) - call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) - data_real(:) = data_real(:)/real(real_size, dp) - q_cons_vf(k)%sf(j, 0, 0:p) = data_real(1:p + 1) - end do - end do - - ! Apply Fourier filter to additional rings - do i = 1, fourier_rings - Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) - do j = 0, m - do k = 1, sys_size - data_fltr_cmplx(:) = (0_dp, 0_dp) - data_real(1:p + 1) = q_cons_vf(k)%sf(j, i, 0:p) - call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) - data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) - call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) - data_real(:) = data_real(:)/real(real_size, dp) - q_cons_vf(k)%sf(j, i, 0:p) = data_real(1:p + 1) - end do - end do - end do -#endif +! integer :: i, j, k, l !< Generic loop iterators +! integer :: ierr !< Generic flag used to identify and report GPU errors + +! ! Restrict filter to processors that have cells adjacent to axis +! if (bc_y%beg >= 0) return +! #if defined(MFC_GPU) + +! $:GPU_PARALLEL_LOOP(collapse=3) +! do k = 1, sys_size +! do j = 0, m +! do l = 1, cmplx_size +! data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) +! end do +! end do +! end do +! $:END_GPU_PARALLEL_LOOP() + +! $:GPU_PARALLEL_LOOP(collapse=3) +! do k = 1, sys_size +! do j = 0, m +! do l = 0, p +! data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, 0, l) +! end do +! end do +! end do +! $:END_GPU_PARALLEL_LOOP() + +! #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') +! #if defined(__PGI) +! ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) +! #else +! ierr = hipfftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) +! call hipCheck(hipDeviceSynchronize()) +! #endif +! #:endcall GPU_HOST_DATA +! Nfq = 3 +! $:GPU_UPDATE(device='[Nfq]') + +! $:GPU_PARALLEL_LOOP(collapse=3) +! do k = 1, sys_size +! do j = 0, m +! do l = 1, Nfq +! data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) +! end do +! end do +! end do +! $:END_GPU_PARALLEL_LOOP() + +! #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') +! #if defined(__PGI) +! ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) +! #else +! ierr = hipfftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) +! call hipCheck(hipDeviceSynchronize()) +! #endif +! #:endcall GPU_HOST_DATA + +! $:GPU_PARALLEL_LOOP(collapse=3) +! do k = 1, sys_size +! do j = 0, m +! do l = 0, p +! data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) +! q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) +! end do +! end do +! end do +! $:END_GPU_PARALLEL_LOOP() + +! do i = 1, fourier_rings + +! $:GPU_PARALLEL_LOOP(collapse=3) +! do k = 1, sys_size +! do j = 0, m +! do l = 1, cmplx_size +! data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) +! end do +! end do +! end do +! $:END_GPU_PARALLEL_LOOP() + +! $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') +! do k = 1, sys_size +! do j = 0, m +! do l = 0, p +! data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) +! end do +! end do +! end do +! $:END_GPU_PARALLEL_LOOP() + +! #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') +! #if defined(__PGI) +! ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) +! #else +! ierr = hipfftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) +! call hipCheck(hipDeviceSynchronize()) +! #endif +! #:endcall GPU_HOST_DATA + +! Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) +! $:GPU_UPDATE(device='[Nfq]') + +! $:GPU_PARALLEL_LOOP(collapse=3) +! do k = 1, sys_size +! do j = 0, m +! do l = 1, Nfq +! data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) +! end do +! end do +! end do +! $:END_GPU_PARALLEL_LOOP() + +! #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') +! #if defined(__PGI) +! ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) +! #else +! ierr = hipfftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) +! call hipCheck(hipDeviceSynchronize()) +! #endif +! #:endcall GPU_HOST_DATA + +! $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') +! do k = 1, sys_size +! do j = 0, m +! do l = 0, p +! data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) +! q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) +! end do +! end do +! end do +! $:END_GPU_PARALLEL_LOOP() +! end do + +! #else +! Nfq = 3 +! do j = 0, m +! do k = 1, sys_size +! data_fltr_cmplx(:) = (0_dp, 0_dp) +! data_real(1:p + 1) = q_cons_vf(k)%sf(j, 0, 0:p) +! call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) +! data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) +! call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) +! data_real(:) = data_real(:)/real(real_size, dp) +! q_cons_vf(k)%sf(j, 0, 0:p) = data_real(1:p + 1) +! end do +! end do + +! ! Apply Fourier filter to additional rings +! do i = 1, fourier_rings +! Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) +! do j = 0, m +! do k = 1, sys_size +! data_fltr_cmplx(:) = (0_dp, 0_dp) +! data_real(1:p + 1) = q_cons_vf(k)%sf(j, i, 0:p) +! call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) +! data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) +! call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) +! data_real(:) = data_real(:)/real(real_size, dp) +! q_cons_vf(k)%sf(j, i, 0:p) = data_real(1:p + 1) +! end do +! end do +! end do +! #endif end subroutine s_apply_fourier_filter @@ -301,25 +301,25 @@ contains !! applying the Fourier filter in the azimuthal direction. impure subroutine s_finalize_fftw_module -#if defined(MFC_GPU) - integer :: ierr !< Generic flag used to identify and report GPU errors - @:DEALLOCATE(data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu) -#if defined(__PGI) - - ierr = cufftDestroy(fwd_plan_gpu) - ierr = cufftDestroy(bwd_plan_gpu) -#else - ierr = hipfftDestroy(fwd_plan_gpu) - ierr = hipfftDestroy(bwd_plan_gpu) -#endif -#else - call fftw_free(fftw_real_data) - call fftw_free(fftw_cmplx_data) - call fftw_free(fftw_fltr_cmplx_data) - - call fftw_destroy_plan(fwd_plan) - call fftw_destroy_plan(bwd_plan) -#endif +! #if defined(MFC_GPU) +! integer :: ierr !< Generic flag used to identify and report GPU errors +! @:DEALLOCATE(data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu) +! #if defined(__PGI) + +! ierr = cufftDestroy(fwd_plan_gpu) +! ierr = cufftDestroy(bwd_plan_gpu) +! #else +! ierr = hipfftDestroy(fwd_plan_gpu) +! ierr = hipfftDestroy(bwd_plan_gpu) +! #endif +! #else +! call fftw_free(fftw_real_data) +! call fftw_free(fftw_cmplx_data) +! call fftw_free(fftw_fltr_cmplx_data) + +! call fftw_destroy_plan(fwd_plan) +! call fftw_destroy_plan(bwd_plan) +! #endif end subroutine s_finalize_fftw_module end module m_fftw diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 53596338a2..4eae61eec9 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -731,9 +731,9 @@ contains real(wp), dimension(nnode, nb) :: wght, abscX, abscY, wght_pb, wght_mv, wght_ht, ht #:endif #:if USING_AMD - real(wp), dimension(32, 0:2, 0:2), intent(out) :: coeff + real(wp), dimension(32, 0:2, 0:2) :: coeff #:else - real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeff + real(wp), dimension(nterms, 0:2, 0:2) :: coeff #:endif real(wp) :: pres, rho, nbub, c, alf, momsum, drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, grad_T real(wp) :: n_tait, B_tait From fc6e65bdf0a361c3742def15253351805fd5c02f Mon Sep 17 00:00:00 2001 From: Anand Radhakrishnan Date: Tue, 27 Jan 2026 01:44:29 -0500 Subject: [PATCH 06/21] Phase Change fixed (539/546 cases work) --- src/common/m_phase_change.fpp | 268 ++++++++------------ src/simulation/m_fftw.fpp | 462 +++++++++++++++++----------------- 2 files changed, 337 insertions(+), 393 deletions(-) diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index a7742c0532..b1505a44eb 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -434,6 +434,7 @@ contains real(wp), dimension(2) :: R2D, DeltamP !< residual and correction array real(wp) :: Om ! underrelaxation factor real(wp) :: mCP, mCPD, mCVGP, mCVGP2, mQ, mQD ! auxiliary variables for the pTg-solver + real(wp) :: ml, mT, dFdT, dTdm, dTdp !< Generic loop iterators integer :: i, ns @@ -506,11 +507,85 @@ contains end do ! calculating the (2D) Jacobian Matrix used in the solution of the pTg-quilibrium model - call s_compute_jacobian_matrix(InvJac, j, Jac, k, l, mCPD, mCVGP, mCVGP2, pS, q_cons_vf, TJac) + + ! mass of the reacting liquid + ml = q_cons_vf(lp + contxb - 1)%sf(j, k, l) + + ! mass of the two participating fluids + mT = q_cons_vf(lp + contxb - 1)%sf(j, k, l) & + + q_cons_vf(vp + contxb - 1)%sf(j, k, l) + + TS = 1/(mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp)) & + + ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & + - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) & + + mCVGP) + + dFdT = & + -(cvs(lp)*gs_min(lp) - cvs(vp)*gs_min(vp))*log(TS) & + - (qvps(lp) - qvps(vp)) & + + cvs(lp)*(gs_min(lp) - 1)*log(pS + ps_inf(lp)) & + - cvs(vp)*(gs_min(vp) - 1)*log(pS + ps_inf(vp)) + + dTdm = -(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & + - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp)))*TS**2 + + dTdp = (mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))**2 & + + ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp))**2 & + - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))**2) & + + mCVGP2)*TS**2 + + ! F = (F1,F2) is the function whose roots we are looking for + ! x = (m1, p) are the independent variables. m1 = mass of the first participant fluid, p = pressure + ! F1 = 0 is the Gibbs free energy quality + ! F2 = 0 is the enforcement of the thermodynamic (total - kinectic) energy + ! dF1dm + Jac(1, 1) = dFdT*dTdm + + ! dF1dp + Jac(1, 2) = dFdT*dTdp + TS & + *(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & + - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) + + ! dF2dm + Jac(2, 1) = (qvs(vp) - qvs(lp) & + + (cvs(vp)*gs_min(vp) - cvs(lp)*gs_min(lp)) & + /(ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & + - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) & + + mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp)) + mCVGP) & + - (ml*(cvs(vp)*gs_min(vp) - cvs(lp)*gs_min(lp)) & + - mT*cvs(vp)*gs_min(vp) - mCPD) & + *(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & + - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) & + /((ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & + - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) & + + mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp)) + mCVGP)**2))/1 + ! dF2dp + Jac(2, 2) = (1 + (ml*(cvs(vp)*gs_min(vp) - cvs(lp)*gs_min(lp)) & + - mT*cvs(vp)*gs_min(vp) - mCPD) & + *(ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp))**2 & + - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))**2) & + + mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))**2 + mCVGP2) & + /(ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & + - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) & + + mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp)) + mCVGP)**2)/1 + + ! intermediate elements of J^{-1} + InvJac(1, 1) = Jac(2, 2) + InvJac(1, 2) = -1.0_wp*Jac(1, 2) + InvJac(2, 1) = -1.0_wp*Jac(2, 1) + InvJac(2, 2) = Jac(1, 1) + + ! elements of J^{T} + TJac(1, 1) = Jac(1, 1) + TJac(1, 2) = Jac(2, 1) + TJac(2, 1) = Jac(1, 2) + TJac(2, 2) = Jac(2, 2) + + ! dividing by det(J) + InvJac = InvJac / (Jac(1, 1)*Jac(2, 2) - Jac(1, 2)*Jac(2, 1)) ! calculating correction array for Newton's method - DeltamP(1) = -1.0_wp*(InvJac(1,1)*R2D(1) + InvJac(1,2)*R2D(2)) - DeltamP(2) = -1.0_wp*(InvJac(2,1)*R2D(1) + InvJac(2,2)*R2D(2)) + DeltamP = -1.0_wp*(matmul(InvJac, R2D)) ! updating two reacting 'masses'. Recall that inert 'masses' do not change during the phase change ! liquid @@ -524,7 +599,34 @@ contains ! calculating residuals, which are (i) the difference between the Gibbs Free energy of the gas and the liquid ! and (ii) the energy before and after the phase-change process. - call s_compute_pTg_residue(j, k, l, mCPD, mCVGP, mQD, q_cons_vf, pS, rhoe, R2D) + + ! mass of the reacting liquid + ml = q_cons_vf(lp + contxb - 1)%sf(j, k, l) + + ! mass of the two participating fluids + mT = q_cons_vf(lp + contxb - 1)%sf(j, k, l) & + + q_cons_vf(vp + contxb - 1)%sf(j, k, l) + + TS = 1/(mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp)) & + + ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & + - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) & + + mCVGP) + + ! Gibbs Free Energy Equality condition (DG) + R2D(1) = TS*((cvs(lp)*gs_min(lp) - cvs(vp)*gs_min(vp)) & + *(1 - log(TS)) - (qvps(lp) - qvps(vp)) & + + cvs(lp)*(gs_min(lp) - 1)*log(pS + ps_inf(lp)) & + - cvs(vp)*(gs_min(vp) - 1)*log(pS + ps_inf(vp))) & + + qvs(lp) - qvs(vp) + + ! Constant Energy Process condition (DE) + R2D(2) = (rhoe + pS & + + ml*(qvs(vp) - qvs(lp)) - mT*qvs(vp) - mQD & + + (ml*(gs_min(vp)*cvs(vp) - gs_min(lp)*cvs(lp)) & + - mT*gs_min(vp)*cvs(vp) - mCPD) & + /(ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & + - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) & + + mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp)) + mCVGP))/1 end do @@ -586,164 +688,6 @@ contains end if end subroutine s_correct_partial_densities - !> This auxiliary subroutine calculates the 2 x 2 Jacobian and, its inverse and transpose - !! to be used in the pTg-equilibirium procedure - !! @param InvJac Inverse of the Jacobian Matrix - !! @param j generic loop iterator for x direction - !! @param Jac Jacobian Matrix - !! @param k generic loop iterator for y direction - !! @param l generic loop iterator for z direction - !! @param mCPD sum of the total alpha*rho*cp - !! @param mCVGP auxiliary variable for the calculation of the matrices: alpha*rho*cv*(g-1)/press - !! @param mCVGP2 auxiliary variable for the calculation of the matrices: alpha*rho*cv*(g-1)/press^2 - !! @param pS equilibrium pressure at the interface - !! @param q_cons_vf Cell-average conservative variables - !! @param TJac Transpose of the Jacobian Matrix - subroutine s_compute_jacobian_matrix(InvJac, j, Jac, k, l, mCPD, mCVGP, mCVGP2, pS, q_cons_vf, TJac) - $:GPU_ROUTINE(function_name='s_compute_jacobian_matrix', & - & parallelism='[seq]', cray_inline=True) - - real(wp), dimension(2, 2), intent(out) :: InvJac - integer, intent(in) :: j - real(wp), dimension(2, 2), intent(out) :: Jac - integer, intent(in) :: k, l - real(wp), intent(in) :: mCPD, mCVGP, mCVGP2, pS - type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf - real(wp), dimension(2, 2), intent(out) :: TJac - - real(wp) :: ml, mT, TS, dFdT, dTdm, dTdp ! mass of the reacting fluid, total reacting mass, and auxiliary variables - - ! mass of the reacting liquid - ml = q_cons_vf(lp + contxb - 1)%sf(j, k, l) - - ! mass of the two participating fluids - mT = q_cons_vf(lp + contxb - 1)%sf(j, k, l) & - + q_cons_vf(vp + contxb - 1)%sf(j, k, l) - - TS = 1/(mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp)) & - + ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & - - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) & - + mCVGP) - - dFdT = & - -(cvs(lp)*gs_min(lp) - cvs(vp)*gs_min(vp))*log(TS) & - - (qvps(lp) - qvps(vp)) & - + cvs(lp)*(gs_min(lp) - 1)*log(pS + ps_inf(lp)) & - - cvs(vp)*(gs_min(vp) - 1)*log(pS + ps_inf(vp)) - - dTdm = -(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & - - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp)))*TS**2 - - dTdp = (mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))**2 & - + ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp))**2 & - - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))**2) & - + mCVGP2)*TS**2 - - ! F = (F1,F2) is the function whose roots we are looking for - ! x = (m1, p) are the independent variables. m1 = mass of the first participant fluid, p = pressure - ! F1 = 0 is the Gibbs free energy quality - ! F2 = 0 is the enforcement of the thermodynamic (total - kinectic) energy - ! dF1dm - Jac(1, 1) = dFdT*dTdm - - ! dF1dp - Jac(1, 2) = dFdT*dTdp + TS & - *(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & - - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) - - ! dF2dm - Jac(2, 1) = (qvs(vp) - qvs(lp) & - + (cvs(vp)*gs_min(vp) - cvs(lp)*gs_min(lp)) & - /(ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & - - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) & - + mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp)) + mCVGP) & - - (ml*(cvs(vp)*gs_min(vp) - cvs(lp)*gs_min(lp)) & - - mT*cvs(vp)*gs_min(vp) - mCPD) & - *(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & - - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) & - /((ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & - - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) & - + mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp)) + mCVGP)**2))/1 - ! dF2dp - Jac(2, 2) = (1 + (ml*(cvs(vp)*gs_min(vp) - cvs(lp)*gs_min(lp)) & - - mT*cvs(vp)*gs_min(vp) - mCPD) & - *(ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp))**2 & - - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))**2) & - + mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))**2 + mCVGP2) & - /(ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & - - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) & - + mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp)) + mCVGP)**2)/1 - - ! intermediate elements of J^{-1} - InvJac(1, 1) = Jac(2, 2) - InvJac(1, 2) = -1.0_wp*Jac(1, 2) - InvJac(2, 1) = -1.0_wp*Jac(2, 1) - InvJac(2, 2) = Jac(1, 1) - - ! elements of J^{T} - TJac(1, 1) = Jac(1, 1) - TJac(1, 2) = Jac(2, 1) - TJac(2, 1) = Jac(1, 2) - TJac(2, 2) = Jac(2, 2) - - ! dividing by det(J) - InvJac = InvJac/(Jac(1, 1)*Jac(2, 2) - Jac(1, 2)*Jac(2, 1)) - - end subroutine s_compute_jacobian_matrix - - !> This auxiliary subroutine computes the residue of the pTg-equilibrium procedure - !! @param j generic loop iterator for x direction - !! @param k generic loop iterator for y direction - !! @param l generic loop iterator for z direction - !! @param mCPD sum of the total alpha*rho*cp - !! @param mCVGP auxiliary variable for the calculation of the matrices: alpha*rho*cv*(g-1)/press - !! @param mQD sum of the total alpha*rho*qv - !! @param q_cons_vf Cell-average conservative variables - !! @param pS equilibrium pressure at the interface - !! @param rhoe mixture energy - !! @param R2D (2D) residue array - subroutine s_compute_pTg_residue(j, k, l, mCPD, mCVGP, mQD, q_cons_vf, pS, rhoe, R2D) - $:GPU_ROUTINE(function_name='s_compute_pTg_residue', & - & parallelism='[seq]', cray_inline=True) - - integer, intent(in) :: j, k, l - real(wp), intent(in) :: mCPD, mCVGP, mQD - type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf - real(wp), intent(in) :: pS, rhoe - real(wp), dimension(2), intent(out) :: R2D - - real(wp) :: ml, mT, TS !< mass of the reacting liquid, total reacting mass, equilibrium temperature - - ! mass of the reacting liquid - ml = q_cons_vf(lp + contxb - 1)%sf(j, k, l) - - ! mass of the two participating fluids - mT = q_cons_vf(lp + contxb - 1)%sf(j, k, l) & - + q_cons_vf(vp + contxb - 1)%sf(j, k, l) - - TS = 1/(mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp)) & - + ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & - - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) & - + mCVGP) - - ! Gibbs Free Energy Equality condition (DG) - R2D(1) = TS*((cvs(lp)*gs_min(lp) - cvs(vp)*gs_min(vp)) & - *(1 - log(TS)) - (qvps(lp) - qvps(vp)) & - + cvs(lp)*(gs_min(lp) - 1)*log(pS + ps_inf(lp)) & - - cvs(vp)*(gs_min(vp) - 1)*log(pS + ps_inf(vp))) & - + qvs(lp) - qvs(vp) - - ! Constant Energy Process condition (DE) - R2D(2) = (rhoe + pS & - + ml*(qvs(vp) - qvs(lp)) - mT*qvs(vp) - mQD & - + (ml*(gs_min(vp)*cvs(vp) - gs_min(lp)*cvs(lp)) & - - mT*gs_min(vp)*cvs(vp) - mCPD) & - /(ml*(cvs(lp)*(gs_min(lp) - 1)/(pS + ps_inf(lp)) & - - cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp))) & - + mT*cvs(vp)*(gs_min(vp) - 1)/(pS + ps_inf(vp)) + mCVGP))/1 - - end subroutine s_compute_pTg_residue - !> This auxiliary subroutine finds the Saturation temperature for a given !! saturation pressure through a newton solver !! @param pSat Saturation Pressure diff --git a/src/simulation/m_fftw.fpp b/src/simulation/m_fftw.fpp index 55f8c8d123..411485af10 100644 --- a/src/simulation/m_fftw.fpp +++ b/src/simulation/m_fftw.fpp @@ -70,55 +70,55 @@ contains !! applying the Fourier filter in the azimuthal direction. impure subroutine s_initialize_fftw_module -! integer :: ierr !< Generic flag used to identify and report GPU errors - -! ! Size of input array going into DFT -! real_size = p + 1 -! ! Size of output array coming out of DFT -! cmplx_size = (p + 1)/2 + 1 - -! x_size = m + 1 -! batch_size = x_size*sys_size - -! #if defined(MFC_GPU) -! rank = 1; istride = 1; ostride = 1 - -! allocate (gpu_fft_size(1:rank), iembed(1:rank), oembed(1:rank)) - -! gpu_fft_size(1) = real_size; -! iembed(1) = 0 -! oembed(1) = 0 -! $:GPU_ENTER_DATA(copyin='[real_size,cmplx_size,x_size,sys_size,batch_size,Nfq]') -! $:GPU_UPDATE(device='[real_size,cmplx_size,x_size,sys_size,batch_size]') -! #else -! ! Allocate input and output DFT data sizes -! fftw_real_data = fftw_alloc_real(int(real_size, c_size_t)) -! fftw_cmplx_data = fftw_alloc_complex(int(cmplx_size, c_size_t)) -! fftw_fltr_cmplx_data = fftw_alloc_complex(int(cmplx_size, c_size_t)) -! ! Associate input and output data pointers with allocated memory -! call c_f_pointer(fftw_real_data, data_real, [real_size]) -! call c_f_pointer(fftw_cmplx_data, data_cmplx, [cmplx_size]) -! call c_f_pointer(fftw_fltr_cmplx_data, data_fltr_cmplx, [cmplx_size]) - -! ! Generate plans for forward and backward DFTs -! fwd_plan = fftw_plan_dft_r2c_1d(real_size, data_real, data_cmplx, FFTW_ESTIMATE) -! bwd_plan = fftw_plan_dft_c2r_1d(real_size, data_fltr_cmplx, data_real, FFTW_ESTIMATE) -! #endif - -! #if defined(MFC_GPU) -! @:ALLOCATE(data_real_gpu(1:real_size*x_size*sys_size)) -! @:ALLOCATE(data_cmplx_gpu(1:cmplx_size*x_size*sys_size)) -! @:ALLOCATE(data_fltr_cmplx_gpu(1:cmplx_size*x_size*sys_size)) - -! #if defined(__PGI) -! ierr = cufftPlanMany(fwd_plan_gpu, rank, gpu_fft_size, iembed, istride, real_size, oembed, ostride, cmplx_size, CUFFT_D2Z, batch_size) -! ierr = cufftPlanMany(bwd_plan_gpu, rank, gpu_fft_size, iembed, istride, cmplx_size, oembed, ostride, real_size, CUFFT_Z2D, batch_size) -! #else -! ierr = hipfftPlanMany(fwd_plan_gpu, rank, gpu_fft_size, iembed, istride, real_size, oembed, ostride, cmplx_size, HIPFFT_D2Z, batch_size) -! ierr = hipfftPlanMany(bwd_plan_gpu, rank, gpu_fft_size, iembed, istride, cmplx_size, oembed, ostride, real_size, HIPFFT_Z2D, batch_size) -! #endif - -! #endif + integer :: ierr !< Generic flag used to identify and report GPU errors + + ! Size of input array going into DFT + real_size = p + 1 + ! Size of output array coming out of DFT + cmplx_size = (p + 1)/2 + 1 + + x_size = m + 1 + batch_size = x_size*sys_size + +#if defined(MFC_GPU) + rank = 1; istride = 1; ostride = 1 + + allocate (gpu_fft_size(1:rank), iembed(1:rank), oembed(1:rank)) + + gpu_fft_size(1) = real_size; + iembed(1) = 0 + oembed(1) = 0 + $:GPU_ENTER_DATA(copyin='[real_size,cmplx_size,x_size,sys_size,batch_size,Nfq]') + $:GPU_UPDATE(device='[real_size,cmplx_size,x_size,sys_size,batch_size]') +#else + ! Allocate input and output DFT data sizes + fftw_real_data = fftw_alloc_real(int(real_size, c_size_t)) + fftw_cmplx_data = fftw_alloc_complex(int(cmplx_size, c_size_t)) + fftw_fltr_cmplx_data = fftw_alloc_complex(int(cmplx_size, c_size_t)) + ! Associate input and output data pointers with allocated memory + call c_f_pointer(fftw_real_data, data_real, [real_size]) + call c_f_pointer(fftw_cmplx_data, data_cmplx, [cmplx_size]) + call c_f_pointer(fftw_fltr_cmplx_data, data_fltr_cmplx, [cmplx_size]) + + ! Generate plans for forward and backward DFTs + fwd_plan = fftw_plan_dft_r2c_1d(real_size, data_real, data_cmplx, FFTW_ESTIMATE) + bwd_plan = fftw_plan_dft_c2r_1d(real_size, data_fltr_cmplx, data_real, FFTW_ESTIMATE) +#endif + +#if defined(MFC_GPU) + @:ALLOCATE(data_real_gpu(1:real_size*x_size*sys_size)) + @:ALLOCATE(data_cmplx_gpu(1:cmplx_size*x_size*sys_size)) + @:ALLOCATE(data_fltr_cmplx_gpu(1:cmplx_size*x_size*sys_size)) + +#if defined(__PGI) + ierr = cufftPlanMany(fwd_plan_gpu, rank, gpu_fft_size, iembed, istride, real_size, oembed, ostride, cmplx_size, CUFFT_D2Z, batch_size) + ierr = cufftPlanMany(bwd_plan_gpu, rank, gpu_fft_size, iembed, istride, cmplx_size, oembed, ostride, real_size, CUFFT_Z2D, batch_size) +#else + ierr = hipfftPlanMany(fwd_plan_gpu, rank, gpu_fft_size, iembed, istride, real_size, oembed, ostride, cmplx_size, HIPFFT_D2Z, batch_size) + ierr = hipfftPlanMany(bwd_plan_gpu, rank, gpu_fft_size, iembed, istride, cmplx_size, oembed, ostride, real_size, HIPFFT_Z2D, batch_size) +#endif + +#endif end subroutine s_initialize_fftw_module @@ -130,169 +130,169 @@ contains impure subroutine s_apply_fourier_filter(q_cons_vf) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf -! integer :: i, j, k, l !< Generic loop iterators -! integer :: ierr !< Generic flag used to identify and report GPU errors - -! ! Restrict filter to processors that have cells adjacent to axis -! if (bc_y%beg >= 0) return -! #if defined(MFC_GPU) - -! $:GPU_PARALLEL_LOOP(collapse=3) -! do k = 1, sys_size -! do j = 0, m -! do l = 1, cmplx_size -! data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) -! end do -! end do -! end do -! $:END_GPU_PARALLEL_LOOP() - -! $:GPU_PARALLEL_LOOP(collapse=3) -! do k = 1, sys_size -! do j = 0, m -! do l = 0, p -! data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, 0, l) -! end do -! end do -! end do -! $:END_GPU_PARALLEL_LOOP() - -! #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') -! #if defined(__PGI) -! ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) -! #else -! ierr = hipfftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) -! call hipCheck(hipDeviceSynchronize()) -! #endif -! #:endcall GPU_HOST_DATA -! Nfq = 3 -! $:GPU_UPDATE(device='[Nfq]') - -! $:GPU_PARALLEL_LOOP(collapse=3) -! do k = 1, sys_size -! do j = 0, m -! do l = 1, Nfq -! data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) -! end do -! end do -! end do -! $:END_GPU_PARALLEL_LOOP() - -! #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') -! #if defined(__PGI) -! ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) -! #else -! ierr = hipfftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) -! call hipCheck(hipDeviceSynchronize()) -! #endif -! #:endcall GPU_HOST_DATA - -! $:GPU_PARALLEL_LOOP(collapse=3) -! do k = 1, sys_size -! do j = 0, m -! do l = 0, p -! data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) -! q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) -! end do -! end do -! end do -! $:END_GPU_PARALLEL_LOOP() - -! do i = 1, fourier_rings - -! $:GPU_PARALLEL_LOOP(collapse=3) -! do k = 1, sys_size -! do j = 0, m -! do l = 1, cmplx_size -! data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) -! end do -! end do -! end do -! $:END_GPU_PARALLEL_LOOP() - -! $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') -! do k = 1, sys_size -! do j = 0, m -! do l = 0, p -! data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) -! end do -! end do -! end do -! $:END_GPU_PARALLEL_LOOP() - -! #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') -! #if defined(__PGI) -! ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) -! #else -! ierr = hipfftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) -! call hipCheck(hipDeviceSynchronize()) -! #endif -! #:endcall GPU_HOST_DATA - -! Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) -! $:GPU_UPDATE(device='[Nfq]') - -! $:GPU_PARALLEL_LOOP(collapse=3) -! do k = 1, sys_size -! do j = 0, m -! do l = 1, Nfq -! data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) -! end do -! end do -! end do -! $:END_GPU_PARALLEL_LOOP() - -! #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') -! #if defined(__PGI) -! ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) -! #else -! ierr = hipfftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) -! call hipCheck(hipDeviceSynchronize()) -! #endif -! #:endcall GPU_HOST_DATA - -! $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') -! do k = 1, sys_size -! do j = 0, m -! do l = 0, p -! data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) -! q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) -! end do -! end do -! end do -! $:END_GPU_PARALLEL_LOOP() -! end do - -! #else -! Nfq = 3 -! do j = 0, m -! do k = 1, sys_size -! data_fltr_cmplx(:) = (0_dp, 0_dp) -! data_real(1:p + 1) = q_cons_vf(k)%sf(j, 0, 0:p) -! call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) -! data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) -! call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) -! data_real(:) = data_real(:)/real(real_size, dp) -! q_cons_vf(k)%sf(j, 0, 0:p) = data_real(1:p + 1) -! end do -! end do - -! ! Apply Fourier filter to additional rings -! do i = 1, fourier_rings -! Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) -! do j = 0, m -! do k = 1, sys_size -! data_fltr_cmplx(:) = (0_dp, 0_dp) -! data_real(1:p + 1) = q_cons_vf(k)%sf(j, i, 0:p) -! call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) -! data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) -! call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) -! data_real(:) = data_real(:)/real(real_size, dp) -! q_cons_vf(k)%sf(j, i, 0:p) = data_real(1:p + 1) -! end do -! end do -! end do -! #endif + integer :: i, j, k, l !< Generic loop iterators + integer :: ierr !< Generic flag used to identify and report GPU errors + + ! Restrict filter to processors that have cells adjacent to axis + if (bc_y%beg >= 0) return +#if defined(MFC_GPU) + + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, 0, l) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') +#if defined(__PGI) + ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) +#else + ierr = hipfftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) + call hipCheck(hipDeviceSynchronize()) +#endif + #:endcall GPU_HOST_DATA + Nfq = 3 + $:GPU_UPDATE(device='[Nfq]') + + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') +#if defined(__PGI) + ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) +#else + ierr = hipfftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) + call hipCheck(hipDeviceSynchronize()) +#endif + #:endcall GPU_HOST_DATA + + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, 0, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + do i = 1, fourier_rings + + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, cmplx_size + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = (0_dp, 0_dp) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = q_cons_vf(k)%sf(j, i, l) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') +#if defined(__PGI) + ierr = cufftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) +#else + ierr = hipfftExecD2Z(fwd_plan_gpu, data_real_gpu, data_cmplx_gpu) + call hipCheck(hipDeviceSynchronize()) +#endif + #:endcall GPU_HOST_DATA + + Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) + $:GPU_UPDATE(device='[Nfq]') + + $:GPU_PARALLEL_LOOP(collapse=3) + do k = 1, sys_size + do j = 0, m + do l = 1, Nfq + data_fltr_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) = data_cmplx_gpu(l + j*cmplx_size + (k - 1)*cmplx_size*x_size) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + + #:call GPU_HOST_DATA(use_device_addr='[data_real_gpu, data_cmplx_gpu, data_fltr_cmplx_gpu]') +#if defined(__PGI) + ierr = cufftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) +#else + ierr = hipfftExecZ2D(bwd_plan_gpu, data_fltr_cmplx_gpu, data_real_gpu) + call hipCheck(hipDeviceSynchronize()) +#endif + #:endcall GPU_HOST_DATA + + $:GPU_PARALLEL_LOOP(collapse=3, firstprivate='[i]') + do k = 1, sys_size + do j = 0, m + do l = 0, p + data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size)/real(real_size, dp) + q_cons_vf(k)%sf(j, i, l) = data_real_gpu(l + j*real_size + 1 + (k - 1)*real_size*x_size) + end do + end do + end do + $:END_GPU_PARALLEL_LOOP() + end do + +#else + Nfq = 3 + do j = 0, m + do k = 1, sys_size + data_fltr_cmplx(:) = (0_dp, 0_dp) + data_real(1:p + 1) = q_cons_vf(k)%sf(j, 0, 0:p) + call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) + data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) + call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) + data_real(:) = data_real(:)/real(real_size, dp) + q_cons_vf(k)%sf(j, 0, 0:p) = data_real(1:p + 1) + end do + end do + + ! Apply Fourier filter to additional rings + do i = 1, fourier_rings + Nfq = min(floor(2_dp*real(i, dp)*pi), cmplx_size) + do j = 0, m + do k = 1, sys_size + data_fltr_cmplx(:) = (0_dp, 0_dp) + data_real(1:p + 1) = q_cons_vf(k)%sf(j, i, 0:p) + call fftw_execute_dft_r2c(fwd_plan, data_real, data_cmplx) + data_fltr_cmplx(1:Nfq) = data_cmplx(1:Nfq) + call fftw_execute_dft_c2r(bwd_plan, data_fltr_cmplx, data_real) + data_real(:) = data_real(:)/real(real_size, dp) + q_cons_vf(k)%sf(j, i, 0:p) = data_real(1:p + 1) + end do + end do + end do +#endif end subroutine s_apply_fourier_filter @@ -301,25 +301,25 @@ contains !! applying the Fourier filter in the azimuthal direction. impure subroutine s_finalize_fftw_module -! #if defined(MFC_GPU) -! integer :: ierr !< Generic flag used to identify and report GPU errors -! @:DEALLOCATE(data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu) -! #if defined(__PGI) - -! ierr = cufftDestroy(fwd_plan_gpu) -! ierr = cufftDestroy(bwd_plan_gpu) -! #else -! ierr = hipfftDestroy(fwd_plan_gpu) -! ierr = hipfftDestroy(bwd_plan_gpu) -! #endif -! #else -! call fftw_free(fftw_real_data) -! call fftw_free(fftw_cmplx_data) -! call fftw_free(fftw_fltr_cmplx_data) - -! call fftw_destroy_plan(fwd_plan) -! call fftw_destroy_plan(bwd_plan) -! #endif +#if defined(MFC_GPU) + integer :: ierr !< Generic flag used to identify and report GPU errors + @:DEALLOCATE(data_real_gpu, data_fltr_cmplx_gpu, data_cmplx_gpu) +#if defined(__PGI) + + ierr = cufftDestroy(fwd_plan_gpu) + ierr = cufftDestroy(bwd_plan_gpu) +#else + ierr = hipfftDestroy(fwd_plan_gpu) + ierr = hipfftDestroy(bwd_plan_gpu) +#endif +#else + call fftw_free(fftw_real_data) + call fftw_free(fftw_cmplx_data) + call fftw_free(fftw_fltr_cmplx_data) + + call fftw_destroy_plan(fwd_plan) + call fftw_destroy_plan(bwd_plan) +#endif end subroutine s_finalize_fftw_module end module m_fftw From 224a4ca32b76f28f986edc7e748604b2936a0bc1 Mon Sep 17 00:00:00 2001 From: Anand Radhakrishnan Date: Tue, 27 Jan 2026 16:11:03 -0500 Subject: [PATCH 07/21] MPI and FFT fixed --- CMakeLists.txt | 11 ++++++----- load_amd.sh | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 77f398be7e..082bff3e65 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -460,7 +460,8 @@ function(MFC_SETUP_TARGET) if (MFC_MPI AND ARGS_MPI) find_package(MPI COMPONENTS Fortran REQUIRED) - target_compile_definitions(${a_target} PRIVATE $ENV{CRAY_MPICH_INC}) + target_compile_definitions(${a_target} PRIVATE MFC_MPI) + target_compile_options(${a_target} PRIVATE "$ENV{CRAY_MPICH_INC}") target_link_libraries (${a_target} PRIVATE $ENV{CRAY_MPICH_LIB}) endif() @@ -481,10 +482,10 @@ function(MFC_SETUP_TARGET) target_link_libraries(${a_target} PRIVATE CUDA::cudart CUDA::cufft) else() #find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED) - target_link_libraries(${a_target} PRIVATE $ENV{CRAY_HIPFORT_INC}) + target_link_libraries(${a_target} PRIVATE $ENV{CRAY_HIPFORT_LIB}) endif() else() - find_package(FFTW REQUIRED) + find_package(FFTW REQUIRED) target_link_libraries(${a_target} PRIVATE FFTW::FFTW) endif() endif() @@ -527,8 +528,8 @@ function(MFC_SETUP_TARGET) target_compile_options(${a_target} PRIVATE -fopenmp) target_link_options(${a_target} PRIVATE -fopenmp) elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") - target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a $ENV{CRAY_MPICH_INC}) - target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a $ENV{CRAY_MPICH_LIB}) + target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a) + target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a) endif() endif() diff --git a/load_amd.sh b/load_amd.sh index 7137c18fc4..4fffa3c536 100644 --- a/load_amd.sh +++ b/load_amd.sh @@ -11,6 +11,7 @@ export LD_LIBRARY_PATH=${OLCF_AFAR_ROOT}/lib:${OLCF_AFAR_ROOT}/lib/llvm/lib:${LD export CRAY_MPICH_INC="-I${OLCF_AFAR_ROOT}/include/mpich3.4a2" export CRAY_HIPFORT_INC="-I${OLCF_AFAR_ROOT}/include/hipfort/amdgcn" +export CRAY_HIPFORT_LIB="-L${OLCF_AFAR_ROOT}/lib -lhipfft" export CRAY_HIP_INC="-I${OLCF_AFAR_ROOT}/include/hip" export CRAY_MPICH_LIB="-L${CRAY_MPICH_PREFIX}/lib \ ${CRAY_PMI_POST_LINK_OPTS} \ From 3e80c6538bd6adf48b1c3bbed4387512e18c2d02 Mon Sep 17 00:00:00 2001 From: Anand Date: Wed, 28 Jan 2026 23:32:41 -0500 Subject: [PATCH 08/21] format --- src/common/m_boundary_common.fpp | 106 +- src/common/m_chemistry.fpp | 52 +- src/common/m_phase_change.fpp | 70 +- src/common/m_variables_conversion.fpp | 79 +- src/pre_process/m_global_parameters.fpp | 2 +- src/simulation/include/inline_riemann.fpp | 73 +- src/simulation/m_acoustic_src.fpp | 10 +- src/simulation/m_bubbles_EE.fpp | 14 +- src/simulation/m_bubbles_EL.fpp | 20 +- src/simulation/m_cbc.fpp | 51 +- src/simulation/m_compute_cbc.fpp | 294 +++-- src/simulation/m_data_output.fpp | 84 +- src/simulation/m_hyperelastic.fpp | 22 +- src/simulation/m_ibm.fpp | 36 +- src/simulation/m_igr.fpp | 1371 ++++++++++----------- src/simulation/m_pressure_relaxation.fpp | 20 +- src/simulation/m_qbmm.fpp | 74 +- src/simulation/m_rhs.fpp | 8 +- src/simulation/m_riemann_solvers.fpp | 508 ++++---- src/simulation/m_sim_helpers.fpp | 24 +- src/simulation/m_start_up.fpp | 4 +- src/simulation/m_surface_tension.fpp | 72 +- src/simulation/m_time_steppers.fpp | 38 +- src/simulation/m_viscous.fpp | 14 +- src/simulation/m_weno.fpp | 101 +- 25 files changed, 1568 insertions(+), 1579 deletions(-) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 8f5412f149..a0c4ed2455 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -54,7 +54,7 @@ contains impure subroutine s_initialize_boundary_common_module() - integer :: i, j + integer :: i, j @:ALLOCATE(bc_buffers(1:3, 1:2)) @@ -62,20 +62,20 @@ contains @:ALLOCATE(bc_buffers(1, 1)%sf(1:sys_size, 0:n, 0:p)) @:ALLOCATE(bc_buffers(1, 2)%sf(1:sys_size, 0:n, 0:p)) #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - if (n > 0) then - @:ALLOCATE(bc_buffers(2,1)%sf(-buff_size:m+buff_size,1:sys_size,0:p)) - @:ALLOCATE(bc_buffers(2,2)%sf(-buff_size:m+buff_size,1:sys_size,0:p)) - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - if (p > 0) then - @:ALLOCATE(bc_buffers(3,1)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,1:sys_size)) - @:ALLOCATE(bc_buffers(3,2)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,1:sys_size)) - end if - #:endif - end if + if (n > 0) then + @:ALLOCATE(bc_buffers(2,1)%sf(-buff_size:m+buff_size,1:sys_size,0:p)) + @:ALLOCATE(bc_buffers(2,2)%sf(-buff_size:m+buff_size,1:sys_size,0:p)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + if (p > 0) then + @:ALLOCATE(bc_buffers(3,1)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,1:sys_size)) + @:ALLOCATE(bc_buffers(3,2)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,1:sys_size)) + end if + #:endif + end if #:endif do i = 1, num_dims do j = 1, 2 - @:ACC_SETUP_SFs(bc_buffers(i,j)) + @:ACC_SETUP_SFs(bc_buffers(i,j)) end do end do @@ -160,7 +160,7 @@ contains if (n == 0) return #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - + if (bc_y%beg >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, -1, sys_size, pb_in, mv_in) else @@ -194,8 +194,6 @@ contains $:END_GPU_PARALLEL_LOOP() end if - - if (bc_y%end >= 0) then call s_mpi_sendrecv_variables_buffers(q_prim_vf, 2, 1, sys_size, pb_in, mv_in) else @@ -1063,41 +1061,41 @@ contains end do end if elseif (bc_dir == 2) then !< y-direction -#:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - if (bc_loc == -1) then !< bc_y%beg - do i = 1, sys_size - do j = 1, buff_size - q_prim_vf(i)%sf(k, -j, l) = & - bc_buffers(2, 1)%sf(k, i, l) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 + if (bc_loc == -1) then !< bc_y%beg + do i = 1, sys_size + do j = 1, buff_size + q_prim_vf(i)%sf(k, -j, l) = & + bc_buffers(2, 1)%sf(k, i, l) + end do end do - end do - else !< bc_y%end - do i = 1, sys_size - do j = 1, buff_size - q_prim_vf(i)%sf(k, n + j, l) = & - bc_buffers(2, 2)%sf(k, i, l) + else !< bc_y%end + do i = 1, sys_size + do j = 1, buff_size + q_prim_vf(i)%sf(k, n + j, l) = & + bc_buffers(2, 2)%sf(k, i, l) + end do end do - end do - end if -#:endif + end if + #:endif elseif (bc_dir == 3) then !< z-direction -#:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - if (bc_loc == -1) then !< bc_z%beg - do i = 1, sys_size - do j = 1, buff_size - q_prim_vf(i)%sf(k, l, -j) = & - bc_buffers(3, 1)%sf(k, l, i) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + if (bc_loc == -1) then !< bc_z%beg + do i = 1, sys_size + do j = 1, buff_size + q_prim_vf(i)%sf(k, l, -j) = & + bc_buffers(3, 1)%sf(k, l, i) + end do end do - end do - else !< bc_z%end - do i = 1, sys_size - do j = 1, buff_size - q_prim_vf(i)%sf(k, l, p + j) = & - bc_buffers(3, 2)%sf(k, l, i) + else !< bc_z%end + do i = 1, sys_size + do j = 1, buff_size + q_prim_vf(i)%sf(k, l, p + j) = & + bc_buffers(3, 2)%sf(k, l, i) + end do end do - end do - end if -#:endif + end if + #:endif end if #else call s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l) @@ -2191,16 +2189,16 @@ contains deallocate (bc_buffers(1, 1)%sf) deallocate (bc_buffers(1, 2)%sf) #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - if (n > 0) then - deallocate (bc_buffers(2, 1)%sf) - deallocate (bc_buffers(2, 2)%sf) - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - if (p > 0) then - deallocate (bc_buffers(3, 1)%sf) - deallocate (bc_buffers(3, 2)%sf) + if (n > 0) then + deallocate (bc_buffers(2, 1)%sf) + deallocate (bc_buffers(2, 2)%sf) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + if (p > 0) then + deallocate (bc_buffers(3, 1)%sf) + deallocate (bc_buffers(3, 2)%sf) + end if + #:endif end if - #:endif - end if #:endif end if diff --git a/src/common/m_chemistry.fpp b/src/common/m_chemistry.fpp index 4e89ea4a8f..69aba788db 100644 --- a/src/common/m_chemistry.fpp +++ b/src/common/m_chemistry.fpp @@ -126,13 +126,13 @@ contains integer :: eqn real(wp) :: T real(wp) :: rho, omega_m -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(10) :: Ys - real(wp), dimension(10) :: omega -#:else - real(wp), dimension(num_species) :: Ys - real(wp), dimension(num_species) :: omega -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(10) :: Ys + real(wp), dimension(10) :: omega + #:else + real(wp), dimension(num_species) :: Ys + real(wp), dimension(num_species) :: omega + #:endif $:GPU_PARALLEL_LOOP(collapse=3, private='[Ys, omega, eqn, T, rho, omega_m]', copyin='[bounds]') do z = bounds(3)%beg, bounds(3)%end @@ -174,17 +174,17 @@ contains type(int_bounds_info), intent(in) :: irx, iry, irz integer, intent(in) :: idir -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(10) :: Xs_L, Xs_R, Xs_cell, Ys_L, Ys_R, Ys_cell - real(wp), dimension(10) :: mass_diffusivities_mixavg1, mass_diffusivities_mixavg2 - real(wp), dimension(10) :: mass_diffusivities_mixavg_Cell, dXk_dxi, h_l, h_r, h_k - real(wp), dimension(10) :: Mass_Diffu_Flux, dYk_dxi -#:else - real(wp), dimension(num_species) :: Xs_L, Xs_R, Xs_cell, Ys_L, Ys_R, Ys_cell - real(wp), dimension(num_species) :: mass_diffusivities_mixavg1, mass_diffusivities_mixavg2 - real(wp), dimension(num_species) :: mass_diffusivities_mixavg_Cell, dXk_dxi, h_l, h_r, h_k - real(wp), dimension(num_species) :: Mass_Diffu_Flux, dYk_dxi -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(10) :: Xs_L, Xs_R, Xs_cell, Ys_L, Ys_R, Ys_cell + real(wp), dimension(10) :: mass_diffusivities_mixavg1, mass_diffusivities_mixavg2 + real(wp), dimension(10) :: mass_diffusivities_mixavg_Cell, dXk_dxi, h_l, h_r, h_k + real(wp), dimension(10) :: Mass_Diffu_Flux, dYk_dxi + #:else + real(wp), dimension(num_species) :: Xs_L, Xs_R, Xs_cell, Ys_L, Ys_R, Ys_cell + real(wp), dimension(num_species) :: mass_diffusivities_mixavg1, mass_diffusivities_mixavg2 + real(wp), dimension(num_species) :: mass_diffusivities_mixavg_Cell, dXk_dxi, h_l, h_r, h_k + real(wp), dimension(num_species) :: Mass_Diffu_Flux, dYk_dxi + #:endif real(wp) :: Mass_Diffu_Energy real(wp) :: MW_L, MW_R, MW_cell, Rgas_L, Rgas_R, T_L, T_R, P_L, P_R, rho_L, rho_R, rho_cell, rho_Vic @@ -268,11 +268,11 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = chemxb, chemxe #:if USING_AMD - h_l(i - chemxb + 1) = h_l(i - chemxb + 1)*gas_constant*T_L/molecular_weights_nonparameter(i - chemxb + 1) - h_r(i - chemxb + 1) = h_r(i - chemxb + 1)*gas_constant*T_R/molecular_weights_nonparameter(i - chemxb + 1) + h_l(i - chemxb + 1) = h_l(i - chemxb + 1)*gas_constant*T_L/molecular_weights_nonparameter(i - chemxb + 1) + h_r(i - chemxb + 1) = h_r(i - chemxb + 1)*gas_constant*T_R/molecular_weights_nonparameter(i - chemxb + 1) #:else - h_l(i - chemxb + 1) = h_l(i - chemxb + 1)*gas_constant*T_L/molecular_weights(i - chemxb + 1) - h_r(i - chemxb + 1) = h_r(i - chemxb + 1)*gas_constant*T_R/molecular_weights(i - chemxb + 1) + h_l(i - chemxb + 1) = h_l(i - chemxb + 1)*gas_constant*T_L/molecular_weights(i - chemxb + 1) + h_r(i - chemxb + 1) = h_r(i - chemxb + 1)*gas_constant*T_R/molecular_weights(i - chemxb + 1) #:endif Xs_cell(i - chemxb + 1) = 0.5_wp*(Xs_L(i - chemxb + 1) + Xs_R(i - chemxb + 1)) h_k(i - chemxb + 1) = 0.5_wp*(h_l(i - chemxb + 1) + h_r(i - chemxb + 1)) @@ -295,11 +295,11 @@ contains $:GPU_LOOP(parallelism='[seq]') do eqn = chemxb, chemxe #:if USING_AMD - Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell*mass_diffusivities_mixavg_Cell(eqn - chemxb + 1)* & - molecular_weights_nonparameter(eqn - chemxb + 1)/MW_cell*dXk_dxi(eqn - chemxb + 1) + Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell*mass_diffusivities_mixavg_Cell(eqn - chemxb + 1)* & + molecular_weights_nonparameter(eqn - chemxb + 1)/MW_cell*dXk_dxi(eqn - chemxb + 1) #:else - Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell*mass_diffusivities_mixavg_Cell(eqn - chemxb + 1)* & - molecular_weights(eqn - chemxb + 1)/MW_cell*dXk_dxi(eqn - chemxb + 1) + Mass_Diffu_Flux(eqn - chemxb + 1) = rho_cell*mass_diffusivities_mixavg_Cell(eqn - chemxb + 1)* & + molecular_weights(eqn - chemxb + 1)/MW_cell*dXk_dxi(eqn - chemxb + 1) #:endif rho_Vic = rho_Vic + Mass_Diffu_Flux(eqn - chemxb + 1) Mass_Diffu_Energy = Mass_Diffu_Energy + h_k(eqn - chemxb + 1)*Mass_Diffu_Flux(eqn - chemxb + 1) diff --git a/src/common/m_phase_change.fpp b/src/common/m_phase_change.fpp index b1505a44eb..67c1c698f3 100644 --- a/src/common/m_phase_change.fpp +++ b/src/common/m_phase_change.fpp @@ -91,11 +91,11 @@ contains ! $:GPU_DECLARE(create='[pS,pSOV,pSSL,TS,TSOV,TSSL,TSatOV,TSatSL]') ! $:GPU_DECLARE(create='[rhoe,dynE,rhos,rho,rM,m1,m2,MCT,TvF]') -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok -#:else - real(wp), dimension(num_fluids) :: p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok + #:else + real(wp), dimension(num_fluids) :: p_infOV, p_infpT, p_infSL, sk, hk, gk, ek, rhok + #:endif ! $:GPU_DECLARE(create='[p_infOV,p_infpT,p_infSL,sk,hk,gk,ek,rhok]') !< Generic loop iterators @@ -238,23 +238,23 @@ contains do i = 1, num_fluids ! entropy sk(i) = cvs(i)*log((TS**gs_min(i)) & - /((pS + ps_inf(i))**(gs_min(i) - 1.0_wp))) + qvps(i) + /((pS + ps_inf(i))**(gs_min(i) - 1.0_wp))) + qvps(i) ! enthalpy hk(i) = gs_min(i)*cvs(i)*TS & - + qvs(i) + + qvs(i) ! Gibbs-free energy gk(i) = hk(i) - TS*sk(i) ! densities rhok(i) = (pS + ps_inf(i)) & - /((gs_min(i) - 1)*cvs(i)*TS) + /((gs_min(i) - 1)*cvs(i)*TS) ! internal energy ek(i) = (pS + gs_min(i) & - *ps_inf(i))/(pS + ps_inf(i)) & - *cvs(i)*TS + qvs(i) + *ps_inf(i))/(pS + ps_inf(i)) & + *cvs(i)*TS + qvs(i) end do ! calculating volume fractions, internal energies, and total entropy @@ -299,11 +299,11 @@ contains ! initializing variables integer, intent(in) :: j, k, l, MFL real(wp), intent(out) :: pS -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(out) :: p_infpT -#:else - real(wp), dimension(num_fluids), intent(out) :: p_infpT -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(out) :: p_infpT + #:else + real(wp), dimension(num_fluids), intent(out) :: p_infpT + #:endif type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf real(wp), intent(in) :: rhoe real(wp), intent(out) :: TS @@ -315,7 +315,7 @@ contains ! auxiliary variables for the pT-equilibrium solver mCP = 0.0_wp; mQ = 0.0_wp; p_infpT_sum = 0._wp $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids + do i = 1, num_fluids p_infpT(i) = ps_inf(i) p_infpT_sum = p_infpT_sum + abs(p_infpT(i)) end do @@ -331,14 +331,14 @@ contains end do -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - if(num_fluids < 3) then - $:GPU_LOOP(parallelism='[seq]') - do i = num_fluids+1, 3 - p_infpT(i) = p_infpT_sum - end do - end if -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + if (num_fluids < 3) then + $:GPU_LOOP(parallelism='[seq]') + do i = num_fluids + 1, 3 + p_infpT(i) = p_infpT_sum + end do + end if + #:endif ! Checking energy constraint if ((rhoe - mQ - minval(p_infpT)) < 0.0_wp) then @@ -417,19 +417,19 @@ contains integer, intent(in) :: j, k, l real(wp), intent(inout) :: pS -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(in) :: p_infpT -#:else - real(wp), dimension(num_fluids), intent(in) :: p_infpT -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: p_infpT + #:else + real(wp), dimension(num_fluids), intent(in) :: p_infpT + #:endif real(wp), intent(in) :: rhoe type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf real(wp), intent(inout) :: TS -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: p_infpTg !< stiffness for the participating fluids for pTg-equilibrium -#:else - real(wp), dimension(num_fluids) :: p_infpTg !< stiffness for the participating fluids for pTg-equilibrium -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: p_infpTg !< stiffness for the participating fluids for pTg-equilibrium + #:else + real(wp), dimension(num_fluids) :: p_infpTg !< stiffness for the participating fluids for pTg-equilibrium + #:endif real(wp), dimension(2, 2) :: Jac, InvJac, TJac !< matrices for the Newton Solver real(wp), dimension(2) :: R2D, DeltamP !< residual and correction array real(wp) :: Om ! underrelaxation factor @@ -582,7 +582,7 @@ contains TJac(2, 2) = Jac(2, 2) ! dividing by det(J) - InvJac = InvJac / (Jac(1, 1)*Jac(2, 2) - Jac(1, 2)*Jac(2, 1)) + InvJac = InvJac/(Jac(1, 1)*Jac(2, 2) - Jac(1, 2)*Jac(2, 1)) ! calculating correction array for Newton's method DeltamP = -1.0_wp*(matmul(InvJac, R2D)) diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index bc20350c91..dddb286836 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -128,7 +128,6 @@ contains real(wp) :: E_e real(wp) :: e_Per_Kg, Pdyn_Per_Kg real(wp) :: T_guess - integer :: s !< Generic loop iterator @@ -252,10 +251,9 @@ contains real(wp), intent(out), target :: qv real(wp), optional, dimension(2), intent(out) :: Re_K - real(wp), optional, intent(out) :: G_K + real(wp), optional, intent(out) :: G_K real(wp), dimension(num_fluids) :: alpha_rho_K, alpha_K !< real(wp), optional, dimension(num_fluids), intent(in) :: G - integer :: i, j !< Generic loop iterator @@ -323,17 +321,16 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), intent(out) :: rho_K, gamma_K, pi_inf_K, qv_K -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(inout) :: alpha_rho_K, alpha_K !< - real(wp), optional, dimension(3), intent(in) :: G -#:else - real(wp), dimension(num_fluids), intent(inout) :: alpha_rho_K, alpha_K !< - real(wp), optional, dimension(num_fluids), intent(in) :: G -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(inout) :: alpha_rho_K, alpha_K !< + real(wp), optional, dimension(3), intent(in) :: G + #:else + real(wp), dimension(num_fluids), intent(inout) :: alpha_rho_K, alpha_K !< + real(wp), optional, dimension(num_fluids), intent(in) :: G + #:endif real(wp), dimension(2), intent(out) :: Re_K real(wp), optional, intent(out) :: G_K real(wp) :: alpha_K_sum - integer :: i, j !< Generic loop iterators @@ -590,15 +587,15 @@ contains type(scalar_field), intent(inout) :: q_T_sf type(scalar_field), dimension(sys_size), intent(inout) :: qK_prim_vf type(int_bounds_info), dimension(1:3), intent(in) :: ibounds -#:if USING_AMD and not MFC_CASE_OPTIMIZATION - real(wp), dimension(3) :: alpha_K, alpha_rho_K - real(wp), dimension(3) :: nRtmp - real(wp) :: rhoYks(1:10) -#:else - real(wp), dimension(num_fluids) :: alpha_K, alpha_rho_K - real(wp), dimension(nb) :: nRtmp - real(wp) :: rhoYks(1:num_species) -#:endif + #:if USING_AMD and not MFC_CASE_OPTIMIZATION + real(wp), dimension(3) :: alpha_K, alpha_rho_K + real(wp), dimension(3) :: nRtmp + real(wp) :: rhoYks(1:10) + #:else + real(wp), dimension(num_fluids) :: alpha_K, alpha_rho_K + real(wp), dimension(nb) :: nRtmp + real(wp) :: rhoYks(1:num_species) + #:endif real(wp), dimension(2) :: Re_K real(wp) :: rho_K, gamma_K, pi_inf_K, qv_K, dyn_pres_K @@ -1190,17 +1187,17 @@ contains ! Partial densities, density, velocity, pressure, energy, advection ! variables, the specific heat ratio and liquid stiffness functions, ! the shear and volume Reynolds numbers and the Weber numbers -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: alpha_rho_K - real(wp), dimension(3) :: alpha_K - real(wp), dimension(3) :: vel_K - real(wp), dimension(10) :: Y_K -#:else - real(wp), dimension(num_fluids) :: alpha_rho_K - real(wp), dimension(num_fluids) :: alpha_K - real(wp), dimension(num_vels) :: vel_K - real(wp), dimension(num_species) :: Y_K -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha_rho_K + real(wp), dimension(3) :: alpha_K + real(wp), dimension(3) :: vel_K + real(wp), dimension(10) :: Y_K + #:else + real(wp), dimension(num_fluids) :: alpha_rho_K + real(wp), dimension(num_fluids) :: alpha_K + real(wp), dimension(num_vels) :: vel_K + real(wp), dimension(num_species) :: Y_K + #:endif real(wp) :: rho_K real(wp) :: vel_K_sum real(wp) :: pres_K @@ -1337,11 +1334,11 @@ contains & parallelism='[seq]', cray_inline=True) type(scalar_field), dimension(sys_size), intent(in) :: q_vf integer, intent(in) :: k, l, r -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(out) :: alpha_rho_K, alpha_K -#:else - real(wp), dimension(num_fluids), intent(out) :: alpha_rho_K, alpha_K -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(out) :: alpha_rho_K, alpha_K + #:else + real(wp), dimension(num_fluids), intent(out) :: alpha_rho_K, alpha_K + #:endif integer :: i real(wp) :: alpha_K_sum @@ -1411,11 +1408,11 @@ contains real(wp), intent(in) :: pres real(wp), intent(in) :: rho, gamma, pi_inf, qv real(wp), intent(in) :: H -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(in) :: adv -#:else - real(wp), dimension(num_fluids), intent(in) :: adv -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: adv + #:else + real(wp), dimension(num_fluids), intent(in) :: adv + #:endif real(wp), intent(in) :: vel_sum real(wp), intent(in) :: c_c real(wp), intent(out) :: c diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index 8380f5bb02..01bd182e2d 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -300,7 +300,7 @@ module m_global_parameters !! to the next time-step. logical :: fft_wrt - logical :: dummy + logical :: dummy contains diff --git a/src/simulation/include/inline_riemann.fpp b/src/simulation/include/inline_riemann.fpp index 5b4598c0aa..d8d0cc87c7 100644 --- a/src/simulation/include/inline_riemann.fpp +++ b/src/simulation/include/inline_riemann.fpp @@ -40,50 +40,49 @@ eps = 0.001_wp call get_species_enthalpies_rt(T_L, h_iL) call get_species_enthalpies_rt(T_R, h_iR) -#:if USING_AMD - h_iL = h_iL*gas_constant/molecular_weights_nonparameter*T_L - h_iR = h_iR*gas_constant/molecular_weights_nonparameter*T_R -#:else - h_iL = h_iL*gas_constant/molecular_weights*T_L - h_iR = h_iR*gas_constant/molecular_weights*T_R -#:endif + #:if USING_AMD + h_iL = h_iL*gas_constant/molecular_weights_nonparameter*T_L + h_iR = h_iR*gas_constant/molecular_weights_nonparameter*T_R + #:else + h_iL = h_iL*gas_constant/molecular_weights*T_L + h_iR = h_iR*gas_constant/molecular_weights*T_R + #:endif call get_species_specific_heats_r(T_L, Cp_iL) call get_species_specific_heats_r(T_R, Cp_iR) h_avg_2 = (sqrt(rho_L)*h_iL + sqrt(rho_R)*h_iR)/(sqrt(rho_L) + sqrt(rho_R)) Yi_avg = (sqrt(rho_L)*Ys_L + sqrt(rho_R)*Ys_R)/(sqrt(rho_L) + sqrt(rho_R)) T_avg = (sqrt(rho_L)*T_L + sqrt(rho_R)*T_R)/(sqrt(rho_L) + sqrt(rho_R)) -#:if USING_AMD - if (abs(T_L - T_R) < eps) then - ! Case when T_L and T_R are very close - Cp_avg = sum(Yi_avg(:)*(0.5_wp*Cp_iL(:) + 0.5_wp*Cp_iR(:))*gas_constant/molecular_weights_nonparameter(:)) - Cv_avg = sum(Yi_avg(:)*((0.5_wp*Cp_iL(:) + 0.5_wp*Cp_iR(:))*gas_constant/molecular_weights_nonparameter(:) - gas_constant/molecular_weights_nonparameter(:))) - else - ! Normal calculation when T_L and T_R are sufficiently different - Cp_avg = sum(Yi_avg(:)*(h_iR(:) - h_iL(:))/(T_R - T_L)) - Cv_avg = sum(Yi_avg(:)*((h_iR(:) - h_iL(:))/(T_R - T_L) - gas_constant/molecular_weights_nonparameter(:))) - end if - gamma_avg = Cp_avg/Cv_avg - - Phi_avg(:) = (gamma_avg - 1._wp)*(vel_avg_rms/2.0_wp - h_avg_2(:)) + gamma_avg*gas_constant/molecular_weights_nonparameter(:)*T_avg - c_sum_Yi_Phi = sum(Yi_avg(:)*Phi_avg(:)) -#:else - if (abs(T_L - T_R) < eps) then - ! Case when T_L and T_R are very close - Cp_avg = sum(Yi_avg(:)*(0.5_wp*Cp_iL(:) + 0.5_wp*Cp_iR(:))*gas_constant/molecular_weights(:)) - Cv_avg = sum(Yi_avg(:)*((0.5_wp*Cp_iL(:) + 0.5_wp*Cp_iR(:))*gas_constant/molecular_weights(:) - gas_constant/molecular_weights(:))) - else - ! Normal calculation when T_L and T_R are sufficiently different - Cp_avg = sum(Yi_avg(:)*(h_iR(:) - h_iL(:))/(T_R - T_L)) - Cv_avg = sum(Yi_avg(:)*((h_iR(:) - h_iL(:))/(T_R - T_L) - gas_constant/molecular_weights(:))) - end if - gamma_avg = Cp_avg/Cv_avg - - Phi_avg(:) = (gamma_avg - 1._wp)*(vel_avg_rms/2.0_wp - h_avg_2(:)) + gamma_avg*gas_constant/molecular_weights(:)*T_avg - c_sum_Yi_Phi = sum(Yi_avg(:)*Phi_avg(:)) -#:endif + #:if USING_AMD + if (abs(T_L - T_R) < eps) then + ! Case when T_L and T_R are very close + Cp_avg = sum(Yi_avg(:)*(0.5_wp*Cp_iL(:) + 0.5_wp*Cp_iR(:))*gas_constant/molecular_weights_nonparameter(:)) + Cv_avg = sum(Yi_avg(:)*((0.5_wp*Cp_iL(:) + 0.5_wp*Cp_iR(:))*gas_constant/molecular_weights_nonparameter(:) - gas_constant/molecular_weights_nonparameter(:))) + else + ! Normal calculation when T_L and T_R are sufficiently different + Cp_avg = sum(Yi_avg(:)*(h_iR(:) - h_iL(:))/(T_R - T_L)) + Cv_avg = sum(Yi_avg(:)*((h_iR(:) - h_iL(:))/(T_R - T_L) - gas_constant/molecular_weights_nonparameter(:))) + end if + gamma_avg = Cp_avg/Cv_avg + + Phi_avg(:) = (gamma_avg - 1._wp)*(vel_avg_rms/2.0_wp - h_avg_2(:)) + gamma_avg*gas_constant/molecular_weights_nonparameter(:)*T_avg + c_sum_Yi_Phi = sum(Yi_avg(:)*Phi_avg(:)) + #:else + if (abs(T_L - T_R) < eps) then + ! Case when T_L and T_R are very close + Cp_avg = sum(Yi_avg(:)*(0.5_wp*Cp_iL(:) + 0.5_wp*Cp_iR(:))*gas_constant/molecular_weights(:)) + Cv_avg = sum(Yi_avg(:)*((0.5_wp*Cp_iL(:) + 0.5_wp*Cp_iR(:))*gas_constant/molecular_weights(:) - gas_constant/molecular_weights(:))) + else + ! Normal calculation when T_L and T_R are sufficiently different + Cp_avg = sum(Yi_avg(:)*(h_iR(:) - h_iL(:))/(T_R - T_L)) + Cv_avg = sum(Yi_avg(:)*((h_iR(:) - h_iL(:))/(T_R - T_L) - gas_constant/molecular_weights(:))) + end if + gamma_avg = Cp_avg/Cv_avg + + Phi_avg(:) = (gamma_avg - 1._wp)*(vel_avg_rms/2.0_wp - h_avg_2(:)) + gamma_avg*gas_constant/molecular_weights(:)*T_avg + c_sum_Yi_Phi = sum(Yi_avg(:)*Phi_avg(:)) + #:endif - end if #:enddef roe_avg diff --git a/src/simulation/m_acoustic_src.fpp b/src/simulation/m_acoustic_src.fpp index 458f8dd514..75a61104da 100644 --- a/src/simulation/m_acoustic_src.fpp +++ b/src/simulation/m_acoustic_src.fpp @@ -143,11 +143,11 @@ contains type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf integer, intent(in) :: t_step -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: myalpha, myalpha_rho -#:else - real(wp), dimension(num_fluids) :: myalpha, myalpha_rho -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: myalpha, myalpha_rho + #:else + real(wp), dimension(num_fluids) :: myalpha, myalpha_rho + #:endif real(wp) :: myRho, B_tait real(wp) :: sim_time, c, small_gamma real(wp) :: frequency_local, gauss_sigma_time_local diff --git a/src/simulation/m_bubbles_EE.fpp b/src/simulation/m_bubbles_EE.fpp index 6fda2da3b1..dfc36f2a32 100644 --- a/src/simulation/m_bubbles_EE.fpp +++ b/src/simulation/m_bubbles_EE.fpp @@ -165,13 +165,13 @@ contains real(wp) :: rddot real(wp) :: pb_local, mv_local, vflux, pbdot real(wp) :: n_tait, B_tait -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: Rtmp, Vtmp - real(wp), dimension(3) :: myalpha, myalpha_rho -#:else - real(wp), dimension(nb) :: Rtmp, Vtmp - real(wp), dimension(num_fluids) :: myalpha, myalpha_rho -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: Rtmp, Vtmp + real(wp), dimension(3) :: myalpha, myalpha_rho + #:else + real(wp), dimension(nb) :: Rtmp, Vtmp + real(wp), dimension(num_fluids) :: myalpha, myalpha_rho + #:endif real(wp) :: myR, myV, alf, myP, myRho, R2Vav, R3 real(wp) :: nbub !< Bubble number density real(wp) :: my_divu diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index 28e9c8c27b..b3917c3488 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -537,11 +537,11 @@ contains real(wp) :: myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot real(wp) :: myPinf, aux1, aux2, myCson, myRho real(wp) :: gamma, pi_inf, qv -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: myalpha_rho, myalpha -#:else - real(wp), dimension(num_fluids) :: myalpha_rho, myalpha -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: myalpha_rho, myalpha + #:else + real(wp), dimension(num_fluids) :: myalpha_rho, myalpha + #:endif real(wp), dimension(2) :: Re integer, dimension(3) :: cell @@ -782,11 +782,11 @@ contains real(wp), intent(out) :: cson real(wp) :: E, H -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: vel -#:else - real(wp), dimension(num_dims) :: vel -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: vel + #:else + real(wp), dimension(num_dims) :: vel + #:endif integer :: i $:GPU_LOOP(parallelism='[seq]') diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index bd0abfbd20..4dc7d936fb 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -650,28 +650,28 @@ contains real(wp) :: dpi_inf_dt real(wp) :: dqv_dt real(wp) :: dpres_ds -#:if USING_AMD - real(wp), dimension(12) :: L -#:else - real(wp), dimension(sys_size) :: L -#:endif -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: alpha_rho, dalpha_rho_ds, mf - real(wp), dimension(3) :: vel, dvel_ds - real(wp), dimension(3) :: adv_local, dadv_ds - real(wp), dimension(3) :: dadv_dt - real(wp), dimension(3) :: dvel_dt - real(wp), dimension(3) :: dalpha_rho_dt - real(wp), dimension(10) :: Ys, h_k, dYs_dt, dYs_ds, Xs, Gamma_i, Cp_i -#:else - real(wp), dimension(num_fluids) :: alpha_rho, dalpha_rho_ds, mf - real(wp), dimension(num_vels) :: vel, dvel_ds - real(wp), dimension(num_fluids) :: adv_local, dadv_ds - real(wp), dimension(num_fluids) :: dadv_dt - real(wp), dimension(num_dims) :: dvel_dt - real(wp), dimension(num_fluids) :: dalpha_rho_dt - real(wp), dimension(num_species) :: Ys, h_k, dYs_dt, dYs_ds, Xs, Gamma_i, Cp_i -#:endif + #:if USING_AMD + real(wp), dimension(12) :: L + #:else + real(wp), dimension(sys_size) :: L + #:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha_rho, dalpha_rho_ds, mf + real(wp), dimension(3) :: vel, dvel_ds + real(wp), dimension(3) :: adv_local, dadv_ds + real(wp), dimension(3) :: dadv_dt + real(wp), dimension(3) :: dvel_dt + real(wp), dimension(3) :: dalpha_rho_dt + real(wp), dimension(10) :: Ys, h_k, dYs_dt, dYs_ds, Xs, Gamma_i, Cp_i + #:else + real(wp), dimension(num_fluids) :: alpha_rho, dalpha_rho_ds, mf + real(wp), dimension(num_vels) :: vel, dvel_ds + real(wp), dimension(num_fluids) :: adv_local, dadv_ds + real(wp), dimension(num_fluids) :: dadv_dt + real(wp), dimension(num_dims) :: dvel_dt + real(wp), dimension(num_fluids) :: dalpha_rho_dt + real(wp), dimension(num_species) :: Ys, h_k, dYs_dt, dYs_ds, Xs, Gamma_i, Cp_i + #:endif real(wp), dimension(2) :: Re_cbc real(wp), dimension(3) :: lambda @@ -686,7 +686,6 @@ contains real(wp) :: Ma real(wp) :: T, sum_Enthalpies real(wp) :: Cv, Cp, e_mix, Mw, R_gas - real(wp) :: vel_K_sum, vel_dv_dt_sum @@ -744,8 +743,8 @@ contains $:END_GPU_PARALLEL_LOOP() end if - ! PI4 of flux_rs_vf and flux_src_rs_vf at j = 1/2, 3/2 - if(weno_order == 5 .or. dummy) then + ! PI4 of flux_rs_vf and flux_src_rs_vf at j = 1/2, 3/2 + if (weno_order == 5 .or. dummy) then call s_convert_primitive_to_flux_variables(q_prim_rs${XYZ}$_vf, & F_rs${XYZ}$_vf, & F_src_rs${XYZ}$_vf, & @@ -1070,7 +1069,7 @@ contains sum_Enthalpies = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_species - + #:if USING_AMD h_k(i) = h_k(i)*gas_constant/molecular_weights_nonparameter(i)*T sum_Enthalpies = sum_Enthalpies + (rho*h_k(i) - pres*Mw/molecular_weights_nonparameter(i)*Cp/R_gas)*dYs_dt(i) diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp index aa0dd699f6..8c6a1d9fca 100644 --- a/src/simulation/m_compute_cbc.fpp +++ b/src/simulation/m_compute_cbc.fpp @@ -23,11 +23,11 @@ contains $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(3), intent(in) :: lambda real(wp), intent(in) :: rho, c, dpres_ds -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(in) :: dvel_ds -#:else - real(wp), dimension(num_dims), intent(in) :: dvel_ds -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: dvel_ds + #:else + real(wp), dimension(num_dims), intent(in) :: dvel_ds + #:endif real(wp) :: L1 L1 = lambda(1)*(dpres_ds - rho*c*dvel_ds(dir_idx(1))) end function f_base_L1 @@ -35,16 +35,16 @@ contains !> Fill density L variables subroutine s_fill_density_L(L, lambda_factor, lambda2, c, mf, dalpha_rho_ds, dpres_ds) $:GPU_ROUTINE(parallelism='[seq]') -#:if USING_AMD - real(wp), dimension(12), intent(inout) :: L -#:else - real(wp), dimension(sys_size), intent(inout) :: L -#:endif -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds -#:else - real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds -#:endif + #:if USING_AMD + real(wp), dimension(12), intent(inout) :: L + #:else + real(wp), dimension(sys_size), intent(inout) :: L + #:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds + #:else + real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds + #:endif real(wp), intent(in) :: lambda_factor, lambda2, c real(wp), intent(in) :: dpres_ds integer :: i @@ -58,16 +58,16 @@ contains !> Fill velocity L variables subroutine s_fill_velocity_L(L, lambda_factor, lambda2, dvel_ds) $:GPU_ROUTINE(parallelism='[seq]') -#:if USING_AMD - real(wp), dimension(12), intent(inout) :: L -#:else - real(wp), dimension(sys_size), intent(inout) :: L -#:endif -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(in) :: dvel_ds -#:else - real(wp), dimension(num_dims), intent(in) :: dvel_ds -#:endif + #:if USING_AMD + real(wp), dimension(12), intent(inout) :: L + #:else + real(wp), dimension(sys_size), intent(inout) :: L + #:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: dvel_ds + #:else + real(wp), dimension(num_dims), intent(in) :: dvel_ds + #:endif real(wp), intent(in) :: lambda_factor, lambda2 integer :: i @@ -80,16 +80,16 @@ contains !> Fill advection L variables subroutine s_fill_advection_L(L, lambda_factor, lambda2, dadv_ds) $:GPU_ROUTINE(parallelism='[seq]') -#:if USING_AMD - real(wp), dimension(12), intent(inout) :: L -#:else - real(wp), dimension(sys_size), intent(inout) :: L -#:endif -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(in) :: dadv_ds -#:else - real(wp), dimension(num_fluids), intent(in) :: dadv_ds -#:endif + #:if USING_AMD + real(wp), dimension(12), intent(inout) :: L + #:else + real(wp), dimension(sys_size), intent(inout) :: L + #:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: dadv_ds + #:else + real(wp), dimension(num_fluids), intent(in) :: dadv_ds + #:endif real(wp), intent(in) :: lambda_factor, lambda2 integer :: i @@ -102,16 +102,16 @@ contains !> Fill chemistry L variables subroutine s_fill_chemistry_L(L, lambda_factor, lambda2, dYs_ds) $:GPU_ROUTINE(parallelism='[seq]') -#:if USING_AMD - real(wp), dimension(12), intent(inout) :: L -#:else - real(wp), dimension(sys_size), intent(inout) :: L -#:endif -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(10), intent(in) :: dYs_ds -#:else - real(wp), dimension(num_species), intent(in) :: dYs_ds -#:endif + #:if USING_AMD + real(wp), dimension(12), intent(inout) :: L + #:else + real(wp), dimension(sys_size), intent(inout) :: L + #:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(10), intent(in) :: dYs_ds + #:else + real(wp), dimension(num_species), intent(in) :: dYs_ds + #:endif real(wp), intent(in) :: lambda_factor, lambda2 integer :: i @@ -129,16 +129,16 @@ contains & cray_inline=True) real(wp), dimension(3), intent(in) :: lambda -#:if USING_AMD - real(wp), dimension(12), intent(inout) :: L -#:else - real(wp), dimension(sys_size), intent(inout) :: L -#:endif -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(in) :: dvel_ds -#:else - real(wp), dimension(num_dims), intent(in) :: dvel_ds -#:endif + #:if USING_AMD + real(wp), dimension(12), intent(inout) :: L + #:else + real(wp), dimension(sys_size), intent(inout) :: L + #:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: dvel_ds + #:else + real(wp), dimension(num_dims), intent(in) :: dvel_ds + #:endif real(wp), intent(in) :: rho, c, dpres_ds integer :: i @@ -153,22 +153,22 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda -#:if USING_AMD - real(wp), dimension(12), intent(inout) :: L -#:else - real(wp), dimension(sys_size), intent(inout) :: L -#:endif -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds - real(wp), dimension(3), intent(in) :: dvel_ds - real(wp), dimension(3), intent(in) :: dadv_ds - real(wp), dimension(10), intent(in) :: dYs_ds -#:else - real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds - real(wp), dimension(num_dims), intent(in) :: dvel_ds - real(wp), dimension(num_fluids), intent(in) :: dadv_ds - real(wp), dimension(num_species), intent(in) :: dYs_ds -#:endif + #:if USING_AMD + real(wp), dimension(12), intent(inout) :: L + #:else + real(wp), dimension(sys_size), intent(inout) :: L + #:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds + real(wp), dimension(3), intent(in) :: dvel_ds + real(wp), dimension(3), intent(in) :: dadv_ds + real(wp), dimension(10), intent(in) :: dYs_ds + #:else + real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds + real(wp), dimension(num_dims), intent(in) :: dvel_ds + real(wp), dimension(num_fluids), intent(in) :: dadv_ds + real(wp), dimension(num_species), intent(in) :: dYs_ds + #:endif real(wp), intent(in) :: rho, c real(wp), intent(in) :: dpres_ds @@ -193,18 +193,17 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda -#:if USING_AMD - real(wp), dimension(12), intent(inout) :: L -#:else - real(wp), dimension(sys_size), intent(inout) :: L -#:endif -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(in) :: dvel_ds -#:else - real(wp), dimension(num_dims), intent(in) :: dvel_ds -#:endif + #:if USING_AMD + real(wp), dimension(12), intent(inout) :: L + #:else + real(wp), dimension(sys_size), intent(inout) :: L + #:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: dvel_ds + #:else + real(wp), dimension(num_dims), intent(in) :: dvel_ds + #:endif real(wp), intent(in) :: rho, c, dpres_ds - L(1) = f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) L(2:advxe) = 0._wp @@ -217,22 +216,22 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda -#:if USING_AMD - real(wp), dimension(12), intent(inout) :: L -#:else - real(wp), dimension(sys_size), intent(inout) :: L -#:endif -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds - real(wp), dimension(3), intent(in) :: dvel_ds - real(wp), dimension(3), intent(in) :: dadv_ds - real(wp), dimension(10), intent(in) :: dYs_ds -#:else - real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds - real(wp), dimension(num_dims), intent(in) :: dvel_ds - real(wp), dimension(num_fluids), intent(in) :: dadv_ds - real(wp), dimension(num_species), intent(in) :: dYs_ds -#:endif + #:if USING_AMD + real(wp), dimension(12), intent(inout) :: L + #:else + real(wp), dimension(sys_size), intent(inout) :: L + #:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds + real(wp), dimension(3), intent(in) :: dvel_ds + real(wp), dimension(3), intent(in) :: dadv_ds + real(wp), dimension(10), intent(in) :: dYs_ds + #:else + real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds + real(wp), dimension(num_dims), intent(in) :: dvel_ds + real(wp), dimension(num_fluids), intent(in) :: dadv_ds + real(wp), dimension(num_species), intent(in) :: dYs_ds + #:endif real(wp), intent(in) :: rho, c real(wp), intent(in) :: dpres_ds @@ -250,20 +249,20 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda -#:if USING_AMD - real(wp), dimension(12), intent(inout) :: L -#:else - real(wp), dimension(sys_size), intent(inout) :: L -#:endif -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds - real(wp), dimension(3), intent(in) :: dvel_ds - real(wp), dimension(3), intent(in) :: dadv_ds -#:else - real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds - real(wp), dimension(num_dims), intent(in) :: dvel_ds - real(wp), dimension(num_fluids), intent(in) :: dadv_ds -#:endif + #:if USING_AMD + real(wp), dimension(12), intent(inout) :: L + #:else + real(wp), dimension(sys_size), intent(inout) :: L + #:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds + real(wp), dimension(3), intent(in) :: dvel_ds + real(wp), dimension(3), intent(in) :: dadv_ds + #:else + real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds + real(wp), dimension(num_dims), intent(in) :: dvel_ds + real(wp), dimension(num_fluids), intent(in) :: dadv_ds + #:endif real(wp), intent(in) :: rho, c real(wp), intent(in) :: dpres_ds @@ -280,24 +279,23 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda -#:if USING_AMD - real(wp), dimension(12), intent(inout) :: L -#:else - real(wp), dimension(sys_size), intent(inout) :: L -#:endif -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds - real(wp), dimension(3), intent(in) :: dvel_ds - real(wp), dimension(3), intent(in) :: dadv_ds -#:else - real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds - real(wp), dimension(num_dims), intent(in) :: dvel_ds - real(wp), dimension(num_fluids), intent(in) :: dadv_ds -#:endif + #:if USING_AMD + real(wp), dimension(12), intent(inout) :: L + #:else + real(wp), dimension(sys_size), intent(inout) :: L + #:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds + real(wp), dimension(3), intent(in) :: dvel_ds + real(wp), dimension(3), intent(in) :: dadv_ds + #:else + real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds + real(wp), dimension(num_dims), intent(in) :: dvel_ds + real(wp), dimension(num_fluids), intent(in) :: dadv_ds + #:endif real(wp), intent(in) :: rho, c real(wp), intent(in) :: dpres_ds - L(1) = f_base_L1(lambda, rho, c, dpres_ds, dvel_ds) call s_fill_density_L(L, 1._wp, lambda(2), c, mf, dalpha_rho_ds, dpres_ds) call s_fill_velocity_L(L, 1._wp, lambda(2), dvel_ds) @@ -309,11 +307,11 @@ contains subroutine s_compute_supersonic_inflow_L(L) $:GPU_ROUTINE(function_name='s_compute_supersonic_inflow_L', & & parallelism='[seq]', cray_inline=True) -#:if USING_AMD - real(wp), dimension(12), intent(inout) :: L -#:else - real(wp), dimension(sys_size), intent(inout) :: L -#:endif + #:if USING_AMD + real(wp), dimension(12), intent(inout) :: L + #:else + real(wp), dimension(sys_size), intent(inout) :: L + #:endif L(1:advxe) = 0._wp if (chemistry) L(chemxb:chemxe) = 0._wp end subroutine s_compute_supersonic_inflow_L @@ -324,22 +322,22 @@ contains & parallelism='[seq]', cray_inline=True) real(wp), dimension(3), intent(in) :: lambda -#:if USING_AMD - real(wp), dimension(12), intent(inout) :: L -#:else - real(wp), dimension(sys_size), intent(inout) :: L -#:endif -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds - real(wp), dimension(3), intent(in) :: dvel_ds - real(wp), dimension(3), intent(in) :: dadv_ds - real(wp), dimension(10), intent(in) :: dYs_ds -#:else - real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds - real(wp), dimension(num_dims), intent(in) :: dvel_ds - real(wp), dimension(num_fluids), intent(in) :: dadv_ds - real(wp), dimension(num_species), intent(in) :: dYs_ds -#:endif + #:if USING_AMD + real(wp), dimension(12), intent(inout) :: L + #:else + real(wp), dimension(sys_size), intent(inout) :: L + #:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(in) :: mf, dalpha_rho_ds + real(wp), dimension(3), intent(in) :: dvel_ds + real(wp), dimension(3), intent(in) :: dadv_ds + real(wp), dimension(10), intent(in) :: dYs_ds + #:else + real(wp), dimension(num_fluids), intent(in) :: mf, dalpha_rho_ds + real(wp), dimension(num_dims), intent(in) :: dvel_ds + real(wp), dimension(num_fluids), intent(in) :: dadv_ds + real(wp), dimension(num_species), intent(in) :: dYs_ds + #:endif real(wp), intent(in) :: rho, c real(wp), intent(in) :: dpres_ds diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 97c1ce5a4d..6e97af027c 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -268,13 +268,13 @@ contains integer, intent(in) :: t_step real(wp) :: rho !< Cell-avg. density -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: alpha !< Cell-avg. volume fraction - real(wp), dimension(3) :: vel !< Cell-avg. velocity -#:else - real(wp), dimension(num_fluids) :: alpha !< Cell-avg. volume fraction - real(wp), dimension(num_vels) :: vel !< Cell-avg. velocity -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha !< Cell-avg. volume fraction + real(wp), dimension(3) :: vel !< Cell-avg. velocity + #:else + real(wp), dimension(num_fluids) :: alpha !< Cell-avg. volume fraction + real(wp), dimension(num_vels) :: vel !< Cell-avg. velocity + #:endif real(wp) :: vel_sum !< Cell-avg. velocity sum real(wp) :: pres !< Cell-avg. pressure real(wp) :: gamma !< Cell-avg. sp. heat ratio @@ -1574,30 +1574,30 @@ contains elseif (p == 0) then if (bubbles_euler) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - write (i + 30, '(6X,10F24.8)') & - nondim_time, & - rho, & - vel(1), & - vel(2), & - pres, & - alf, & - nR(1), & - nRdot(1), & - R(1), & - Rdot(1) + write (i + 30, '(6X,10F24.8)') & + nondim_time, & + rho, & + vel(1), & + vel(2), & + pres, & + alf, & + nR(1), & + nRdot(1), & + R(1), & + Rdot(1) #:endif else if (elasticity) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - write (i + 30, '(6X,F12.6,F24.8,F24.8,F24.8,F24.8,'// & - 'F24.8,F24.8,F24.8)') & - nondim_time, & - rho, & - vel(1), & - vel(2), & - pres, & - tau_e(1), & - tau_e(2), & - tau_e(3) + write (i + 30, '(6X,F12.6,F24.8,F24.8,F24.8,F24.8,'// & + 'F24.8,F24.8,F24.8)') & + nondim_time, & + rho, & + vel(1), & + vel(2), & + pres, & + tau_e(1), & + tau_e(2), & + tau_e(3) #:endif else write (i + 30, '(6X,F12.6,F24.8,F24.8,F24.8)') & @@ -1609,20 +1609,20 @@ contains end if else #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - write (i + 30, '(6X,F12.6,F24.8,F24.8,F24.8,F24.8,'// & - 'F24.8,F24.8,F24.8,F24.8,F24.8,'// & - 'F24.8)') & - nondim_time, & - rho, & - vel(1), & - vel(2), & - vel(3), & - pres, & - gamma, & - pi_inf, & - qv, & - c, & - accel + write (i + 30, '(6X,F12.6,F24.8,F24.8,F24.8,F24.8,'// & + 'F24.8,F24.8,F24.8,F24.8,F24.8,'// & + 'F24.8)') & + nondim_time, & + rho, & + vel(1), & + vel(2), & + vel(3), & + pres, & + gamma, & + pi_inf, & + qv, & + c, & + accel #:endif end if end if diff --git a/src/simulation/m_hyperelastic.fpp b/src/simulation/m_hyperelastic.fpp index 52914d9938..cc59b8ba90 100644 --- a/src/simulation/m_hyperelastic.fpp +++ b/src/simulation/m_hyperelastic.fpp @@ -98,17 +98,17 @@ contains type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf -#:if USING_AMD - real(wp), dimension(9) :: tensora, tensorb -#:else - real(wp), dimension(tensor_size) :: tensora, tensorb -#:endif - -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: alpha_k, alpha_rho_k -#:else - real(wp), dimension(num_fluids) :: alpha_k, alpha_rho_k -#:endif + #:if USING_AMD + real(wp), dimension(9) :: tensora, tensorb + #:else + real(wp), dimension(tensor_size) :: tensora, tensorb + #:endif + + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha_k, alpha_rho_k + #:else + real(wp), dimension(num_fluids) :: alpha_k, alpha_rho_k + #:endif real(wp), dimension(2) :: Re real(wp) :: rho, gamma, pi_inf, qv real(wp) :: G_local diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 1d4d712226..bc023f4ecc 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -179,19 +179,19 @@ contains real(wp) :: pres_IP real(wp), dimension(3) :: vel_IP, vel_norm_IP real(wp) :: c_IP -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: Gs - real(wp), dimension(3) :: alpha_rho_IP, alpha_IP - real(wp), dimension(3) :: r_IP, v_IP, pb_IP, mv_IP - real(wp), dimension(18) :: nmom_IP - real(wp), dimension(12) :: presb_IP, massv_IP -#:else - real(wp), dimension(num_fluids) :: Gs - real(wp), dimension(num_fluids) :: alpha_rho_IP, alpha_IP - real(wp), dimension(nb) :: r_IP, v_IP, pb_IP, mv_IP - real(wp), dimension(nb*nmom) :: nmom_IP - real(wp), dimension(nb*nnode) :: presb_IP, massv_IP -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: Gs + real(wp), dimension(3) :: alpha_rho_IP, alpha_IP + real(wp), dimension(3) :: r_IP, v_IP, pb_IP, mv_IP + real(wp), dimension(18) :: nmom_IP + real(wp), dimension(12) :: presb_IP, massv_IP + #:else + real(wp), dimension(num_fluids) :: Gs + real(wp), dimension(num_fluids) :: alpha_rho_IP, alpha_IP + real(wp), dimension(nb) :: r_IP, v_IP, pb_IP, mv_IP + real(wp), dimension(nb*nmom) :: nmom_IP + real(wp), dimension(nb*nnode) :: presb_IP, massv_IP + #:endif !! Primitive variables at the image point associated with a ghost point, !! interpolated from surrounding fluid cells. @@ -862,11 +862,11 @@ contains real(wp), intent(INOUT) :: pres_IP real(wp), dimension(3), intent(INOUT) :: vel_IP real(wp), intent(INOUT) :: c_IP -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3), intent(INOUT) :: alpha_IP, alpha_rho_IP -#:else - real(wp), dimension(num_fluids), intent(INOUT) :: alpha_IP, alpha_rho_IP -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3), intent(INOUT) :: alpha_IP, alpha_rho_IP + #:else + real(wp), dimension(num_fluids), intent(INOUT) :: alpha_IP, alpha_rho_IP + #:endif real(wp), optional, dimension(:), intent(INOUT) :: r_IP, v_IP, pb_IP, mv_IP real(wp), optional, dimension(:), intent(INOUT) :: nmom_IP real(wp), optional, dimension(:), intent(INOUT) :: presb_IP, massv_IP diff --git a/src/simulation/m_igr.fpp b/src/simulation/m_igr.fpp index 7e27a51de9..8aff221313 100644 --- a/src/simulation/m_igr.fpp +++ b/src/simulation/m_igr.fpp @@ -57,36 +57,36 @@ module m_igr #if defined(MFC_OpenMP) real(wp) :: coeff_L(-1:3) = [ & - -3._wp/60._wp, & ! Index -1 - 27._wp/60._wp, & ! Index 0 - 47._wp/60._wp, & ! Index 1 - -13._wp/60._wp, & ! Index 2 - 2._wp/60._wp & ! Index 3 - ] + -3._wp/60._wp, & ! Index -1 + 27._wp/60._wp, & ! Index 0 + 47._wp/60._wp, & ! Index 1 + -13._wp/60._wp, & ! Index 2 + 2._wp/60._wp & ! Index 3 + ] real(wp) :: coeff_R(-2:2) = [ & - 2._wp/60._wp, & ! Index -2 - -13._wp/60._wp, & ! Index -1 - 47._wp/60._wp, & ! Index 0 - 27._wp/60._wp, & ! Index 1 - -3._wp/60._wp & ! Index 2 - ] + 2._wp/60._wp, & ! Index -2 + -13._wp/60._wp, & ! Index -1 + 47._wp/60._wp, & ! Index 0 + 27._wp/60._wp, & ! Index 1 + -3._wp/60._wp & ! Index 2 + ] #else real(wp), parameter :: coeff_L(-1:3) = [ & - -3._wp/60._wp, & ! Index -1 - 27._wp/60._wp, & ! Index 0 - 47._wp/60._wp, & ! Index 1 - -13._wp/60._wp, & ! Index 2 - 2._wp/60._wp & ! Index 3 - ] + -3._wp/60._wp, & ! Index -1 + 27._wp/60._wp, & ! Index 0 + 47._wp/60._wp, & ! Index 1 + -13._wp/60._wp, & ! Index 2 + 2._wp/60._wp & ! Index 3 + ] real(wp), parameter :: coeff_R(-2:2) = [ & - 2._wp/60._wp, & ! Index -2 - -13._wp/60._wp, & ! Index -1 - 47._wp/60._wp, & ! Index 0 - 27._wp/60._wp, & ! Index 1 - -3._wp/60._wp & ! Index 2 - ] + 2._wp/60._wp, & ! Index -2 + -13._wp/60._wp, & ! Index -1 + 47._wp/60._wp, & ! Index 0 + 27._wp/60._wp, & ! Index 1 + -3._wp/60._wp & ! Index 2 + ] #endif #:elif igr_order == 3 integer, parameter :: vidxb = -1 @@ -94,26 +94,26 @@ module m_igr #if defined(MFC_OpenMP) real(wp) :: coeff_L(0:2) = [ & - 2._wp/6._wp, & ! Index 0 - 5._wp/6._wp, & ! Index 1 - -1._wp/6._wp & ! Index 2 - ] + 2._wp/6._wp, & ! Index 0 + 5._wp/6._wp, & ! Index 1 + -1._wp/6._wp & ! Index 2 + ] real(wp) :: coeff_R(-1:1) = [ & - -1._wp/6._wp, & ! Index -1 - 5._wp/6._wp, & ! Index 0 - 2._wp/6._wp & ! Index 1 - ] + -1._wp/6._wp, & ! Index -1 + 5._wp/6._wp, & ! Index 0 + 2._wp/6._wp & ! Index 1 + ] #else real(wp), parameter :: coeff_L(0:2) = [ & - 2._wp/6._wp, & ! Index 0 - 5._wp/6._wp, & ! Index 1 - -1._wp/6._wp & ! Index 2 - ] + 2._wp/6._wp, & ! Index 0 + 5._wp/6._wp, & ! Index 1 + -1._wp/6._wp & ! Index 2 + ] real(wp), parameter :: coeff_R(-1:1) = [ & - -1._wp/6._wp, & ! Index -1 - 5._wp/6._wp, & ! Index 0 - 2._wp/6._wp & ! Index 1 - ] + -1._wp/6._wp, & ! Index -1 + 5._wp/6._wp, & ! Index 0 + 2._wp/6._wp & ! Index 1 + ] #endif #:endif @@ -217,10 +217,10 @@ contains #:if not MFC_CASE_OPTIMIZATION if (igr_order == 3) then - vidxb = -1; vidxe = 2; + vidxb = -1; vidxe = 2; $:GPU_UPDATE(device='[vidxb, vidxe]') - @:ALLOCATE(coeff_L(0:2)) + @:ALLOCATE(coeff_L(0:2)) coeff_L(0) = (2._wp/6._wp) coeff_L(1) = (5._wp/6._wp) coeff_L(2) = (-1._wp/6._wp) @@ -230,9 +230,8 @@ contains coeff_R(0) = (5._wp/6._wp) coeff_R(-1) = (-1._wp/6._wp) - elseif (igr_order == 5) then - vidxb = -2; vidxe = 3; + vidxb = -2; vidxe = 3; $:GPU_UPDATE(device='[vidxb, vidxe]') @:ALLOCATE(coeff_L(-1:3)) @@ -482,420 +481,420 @@ contains if (idir == 1) then if (p == 0) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = 0, n - do j = -1, m + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = 0, p + do k = 0, n + do j = -1, m - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 + #:endif #:endif - #:endif - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - !x-direction contributions $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp + do q = vidxb, vidxe + !x-direction contributions $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i + q, k, l) + end do + rho_sf_small(i) = rho_L end do - rho_sf_small(i) = rho_L - end do - dvel_small(1) = (1/(2._wp*dx(j)))*( & - 1._wp*q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - 1._wp*q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + dvel_small(1) = (1/(2._wp*dx(j)))*( & + 1._wp*q_cons_vf(momxb)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + 1._wp*q_cons_vf(momxb)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1 + q, k, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1 + q, k, l)/rho_sf_small(-1)) + + if (q == 0) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + dvel(i, 1) = dvel_small(i) + end do + end if + + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(1))/3._wp + end if - if (q == 0) then + !y-direction contributions $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - dvel(i, 1) = dvel_small(i) + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) + end do + rho_sf_small(i) = rho_L end do - end if - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(1))/3._wp - end if + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + + if (q == 0) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + dvel(i, 2) = dvel_small(i) + end do + end if + + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp + end if + + if (q == 0) then + jac_rhs(j, k, l) = real(alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1)) & + + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & + + (dvel(1, 1) + dvel(2, 2))**2._wp), kind=stp) + end if + end do + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_L(i) = 0._wp + alpha_rho_R(i) = 0._wp + alpha_L(i) = 0._wp + alpha_R(i) = 0._wp + end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = 0._wp + vel_R(i) = 0._wp + end do - !y-direction contributions $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp + do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + q, k + i, l) + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) end do - rho_sf_small(i) = rho_L - end do - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j + q, k - 1, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j + q, k + 1, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j + q, k - 1, l)/rho_sf_small(-1)) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + end do + else + alpha_L(1) = 1._wp + end if - if (q == 0) then $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - dvel(i, 2) = dvel_small(i) + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) end do - end if - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(2))/3._wp - end if + end do - if (q == 0) then - jac_rhs(j, k, l) = real(alf_igr*(2._wp*(dvel(1, 2)*dvel(2, 1)) & - + dvel(1, 1)**2._wp + dvel(2, 2)**2._wp & - + (dvel(1, 1) + dvel(2, 2))**2._wp), kind=stp) - end if - end do + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = 0._wp - alpha_rho_R(i) = 0._wp - alpha_L(i) = 0._wp - alpha_R(i) = 0._wp - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = 0._wp - vel_R(i) = 0._wp - end do + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + end do + else + alpha_R(1) = 1._wp + end if - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j + q, k, l) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + end do end do if (num_fluids > 1) then + + alpha_L(num_fluids) = 1._wp + alpha_R(num_fluids) = 1._wp + $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) + alpha_L(num_fluids) = alpha_L(num_fluids) - alpha_L(i) + alpha_R(num_fluids) = alpha_R(num_fluids) - alpha_R(i) end do - else - alpha_L(1) = 1._wp end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) - end do - end do + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j + q, k, l) - end do + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j + q, k, l) - end do - else - alpha_R(1) = 1._wp - end if + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) + end do $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j + q, k, l) + vel_L(i) = vel_L(i)/rho_L + vel_R(i) = vel_R(i)/rho_R end do - end do - if (num_fluids > 1) then + if (viscous) then + mu_L = 0._wp; mu_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R + end do - alpha_L(num_fluids) = 1._wp - alpha_R(num_fluids) = 1._wp + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)), kind=stp) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(num_fluids) = alpha_L(num_fluids) - alpha_L(i) - alpha_R(num_fluids) = alpha_R(num_fluids) - alpha_R(i) - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)), kind=stp) - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)), kind=stp) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)), kind=stp) - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j + 1)), kind=stp) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i)/rho_L - vel_R(i) = vel_R(i)/rho_R - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dx(j + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j + 1)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j)), kind=stp) + end if + + E_L = 0._wp; E_R = 0._wp + + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j + q, k, l) + end do - if (viscous) then - mu_L = 0._wp; mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j + q, k, l) + end do + + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) + do i = 1, num_fluids - mu_L = alpha_L(i)/Res_igr(1, i) + mu_L - mu_R = alpha_R(i)/Res_igr(1, i) + mu_R + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & + real((0.5_wp*dt*(alpha_rho_L(i)* & + vel_L(1))*(1._wp/dx(j + 1)) - & + 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dx(j + 1))), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + real((0.5_wp*dt*(alpha_rho_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dx(j))), kind=stp) end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dx(j + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j + 1)), kind=stp) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & + real((0.5_wp*dt*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j + 1)) - & + 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dx(j + 1))), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dx(j)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(2)*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & + - real((0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1))), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dx(j + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + real((0.5_wp*dt*(alpha_L(i)* & + vel_L(1))*(1._wp/dx(j)) - & + 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dx(j))), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dx(j)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(2)*(1._wp/dx(j)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + real((0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j))), kind=stp) + end do + end if $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dx(j + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j + 1)), kind=stp) + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + real((0.5_wp*dt*(rho_L*(vel_L(1))**2.0 + & + pres_L)*(1._wp/dx(j + 1)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dx(j + 1))), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dx(j)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(1)*(1._wp/dx(j)), kind=stp) + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & + real((0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dx(j + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j + 1)), kind=stp) + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + real((0.5_wp*dt*(vel_L(1)*(E_L + & + pres_L))*(1._wp/dx(j + 1)) - & + 0.5_wp*dt*cfl*(E_L)*(1._wp/dx(j + 1))), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dx(j)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(1)*(1._wp/dx(j)), kind=stp) - end if - - E_L = 0._wp; E_R = 0._wp - - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j + q, k, l) - end do - - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j + q, k, l) - end do - - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + real((0.5_wp*dt*(rho_L*(vel_L(1))**2.0 + & + pres_L)*(1._wp/dx(j)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dx(j))), kind=stp) - do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & - real((0.5_wp*dt*(alpha_rho_L(i)* & - vel_L(1))*(1._wp/dx(j + 1)) - & - 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dx(j + 1))), kind=stp) + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + real((0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - real((0.5_wp*dt*(alpha_rho_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dx(j))), kind=stp) - end do + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + real((0.5_wp*dt*(vel_L(1)*(E_L + & + pres_L))*(1._wp/dx(j)) - & + 0.5_wp*dt*cfl*(E_L)*(1._wp/dx(j))), kind=stp) - if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & - real((0.5_wp*dt*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j + 1)) - & - 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dx(j + 1))), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - real((0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_L(1)*(1._wp/dx(j + 1))), kind=stp) - + do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - real((0.5_wp*dt*(alpha_L(i)* & - vel_L(1))*(1._wp/dx(j)) - & - 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dx(j))), kind=stp) + rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & + real((0.5_wp*dt*(alpha_rho_R(i)* & + vel_R(1))*(1._wp/dx(j + 1)) + & + 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dx(j + 1))), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + real((0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(1)*(1._wp/dx(j))), kind=stp) + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + real((0.5_wp*dt*(alpha_rho_R(i)* & + vel_R(1))*(1._wp/dx(j)) + & + 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dx(j))), kind=stp) end do - end if - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & - real((0.5_wp*dt*(rho_L*(vel_L(1))**2.0 + & - pres_L)*(1._wp/dx(j + 1)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dx(j + 1))), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & - real((0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j + 1)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dx(j + 1))), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & - real((0.5_wp*dt*(vel_L(1)*(E_L + & - pres_L))*(1._wp/dx(j + 1)) - & - 0.5_wp*dt*cfl*(E_L)*(1._wp/dx(j + 1))), kind=stp) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & + real((0.5_wp*dt*(alpha_R(i)* & + vel_R(1))*(1._wp/dx(j + 1)) + & + 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dx(j + 1))), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - real((0.5_wp*dt*(rho_L*(vel_L(1))**2.0 + & - pres_L)*(1._wp/dx(j)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dx(j))), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & + - real((0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_R(1)*(1._wp/dx(j + 1))), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - real((0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dx(j)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dx(j))), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + real((0.5_wp*dt*(alpha_R(i)* & + vel_R(1))*(1._wp/dx(j)) + & + 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dx(j))), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - real((0.5_wp*dt*(vel_L(1)*(E_L + & - pres_L))*(1._wp/dx(j)) - & - 0.5_wp*dt*cfl*(E_L)*(1._wp/dx(j))), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + real((0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(1)*(1._wp/dx(j))), kind=stp) + end do + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j + 1, k, l) = rhs_vf(i)%sf(j + 1, k, l) + & - real((0.5_wp*dt*(alpha_rho_R(i)* & - vel_R(1))*(1._wp/dx(j + 1)) + & - 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dx(j + 1))), kind=stp) + rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + real((0.5_wp*dt*(rho_R*(vel_R(1))**2.0 + & + pres_R)*(1._wp/dx(j + 1)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dx(j + 1))), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - real((0.5_wp*dt*(alpha_rho_R(i)* & - vel_R(1))*(1._wp/dx(j)) + & - 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dx(j))), kind=stp) - end do - - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) + & - real((0.5_wp*dt*(alpha_R(i)* & - vel_R(1))*(1._wp/dx(j + 1)) + & - 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dx(j + 1))), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j + 1, k, l) = rhs_vf(advxb + i - 1)%sf(j + 1, k, l) & - - real((0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j + 1, k, l)*vel_R(1)*(1._wp/dx(j + 1))), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - real((0.5_wp*dt*(alpha_R(i)* & - vel_R(1))*(1._wp/dx(j)) + & - 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dx(j))), kind=stp) + rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & + real((0.5_wp*dt*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + real((0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(1)*(1._wp/dx(j))), kind=stp) - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + real((0.5_wp*dt*(vel_R(1)*(E_R + & + pres_R))*(1._wp/dx(j + 1)) + & + 0.5_wp*dt*cfl*(E_R)*(1._wp/dx(j + 1))), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j + 1, k, l) = rhs_vf(momxb)%sf(j + 1, k, l) + & + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & real((0.5_wp*dt*(rho_R*(vel_R(1))**2.0 + & - pres_R)*(1._wp/dx(j + 1)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dx(j + 1))), kind=stp) + pres_R)*(1._wp/dx(j)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dx(j))), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j + 1, k, l) = rhs_vf(momxb + 1)%sf(j + 1, k, l) + & - real((0.5_wp*dt*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j + 1)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dx(j + 1))), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + real((0.5_wp*dt*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j + 1, k, l) = rhs_vf(E_idx)%sf(j + 1, k, l) + & + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & real((0.5_wp*dt*(vel_R(1)*(E_R + & - pres_R))*(1._wp/dx(j + 1)) + & - 0.5_wp*dt*cfl*(E_R)*(1._wp/dx(j + 1))), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - real((0.5_wp*dt*(rho_R*(vel_R(1))**2.0 + & - pres_R)*(1._wp/dx(j)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dx(j))), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - real((0.5_wp*dt*rho_R*vel_R(1)*vel_R(2)*(1._wp/dx(j)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dx(j))), kind=stp) - - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - real((0.5_wp*dt*(vel_R(1)*(E_R + & - pres_R))*(1._wp/dx(j)) + & - 0.5_wp*dt*cfl*(E_R)*(1._wp/dx(j))), kind=stp) + pres_R))*(1._wp/dx(j)) + & + 0.5_wp*dt*cfl*(E_R)*(1._wp/dx(j))), kind=stp) + end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + $:END_GPU_PARALLEL_LOOP() #:endif else #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 @@ -1415,400 +1414,400 @@ contains else if (idir == 2) then if (p == 0) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') - do l = 0, p - do k = -1, n - do j = 0, m + $:GPU_PARALLEL_LOOP(collapse=3, private='[j,k,l,rho_L, rho_R, gamma_L, gamma_R, pi_inf_L, pi_inf_R, mu_L, mu_R, vel_L, vel_R, pres_L, pres_R, alpha_L, alpha_R, alpha_rho_L, alpha_rho_R, F_L, F_R, E_L, E_R, cfl, dvel_small, rho_sf_small, vflux_L_arr, vflux_R_arr]') + do l = 0, p + do k = -1, n + do j = 0, m - if (viscous) then - vflux_L_arr = 0._wp - vflux_R_arr = 0._wp + if (viscous) then + vflux_L_arr = 0._wp + vflux_R_arr = 0._wp - #:if MFC_CASE_OPTIMIZATION - #:if igr_order == 5 - !DIR$ unroll 6 - #:elif igr_order == 3 - !DIR$ unroll 4 - #:endif - #:endif - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - - !x-direction contributions + #:if MFC_CASE_OPTIMIZATION + #:if igr_order == 5 + !DIR$ unroll 6 + #:elif igr_order == 3 + !DIR$ unroll 4 + #:endif + #:endif $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp + do q = vidxb, vidxe + + !x-direction contributions $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j + i, k + q, l) + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j + i, k + q, l) + end do + rho_sf_small(i) = rho_L end do - rho_sf_small(i) = rho_L - end do - dvel_small(1) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb)%sf(j + 1, k + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dx(j)))*( & - q_cons_vf(momxb + 1)%sf(j + 1, k + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) + dvel_small(1) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb)%sf(j + 1, k + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j - 1, k + q, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dx(j)))*( & + q_cons_vf(momxb + 1)%sf(j + 1, k + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j - 1, k + q, l)/rho_sf_small(-1)) - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp - end if + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(2)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(-2._wp*dvel_small(1))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(2)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(-2._wp*dvel_small(1))/3._wp + end if - !y-direction contributions - $:GPU_LOOP(parallelism='[seq]') - do i = -1, 1 - rho_L = 0._wp + !y-direction contributions $:GPU_LOOP(parallelism='[seq]') - do r = 1, num_fluids - rho_L = rho_L + q_cons_vf(r)%sf(j, k + i + q, l) + do i = -1, 1 + rho_L = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do r = 1, num_fluids + rho_L = rho_L + q_cons_vf(r)%sf(j, k + i + q, l) + end do + rho_sf_small(i) = rho_L end do - rho_sf_small(i) = rho_L - end do - dvel_small(1) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - dvel_small(2) = (1/(2._wp*dy(k)))*( & - q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & - q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - - if (q > vidxb) then - vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) - vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(2))/3._wp - end if - if (q < vidxe) then - vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) - vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(2))/3._wp - end if - end do - end if + dvel_small(1) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) + dvel_small(2) = (1/(2._wp*dy(k)))*( & + q_cons_vf(momxb + 1)%sf(j, k + 1 + q, l)/rho_sf_small(1) - & + q_cons_vf(momxb + 1)%sf(j, k - 1 + q, l)/rho_sf_small(-1)) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_L(i) = 0._wp - alpha_rho_R(i) = 0._wp - alpha_L(i) = 0._wp - alpha_R(i) = 0._wp - end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = 0._wp - vel_R(i) = 0._wp - end do + if (q > vidxb) then + vflux_L_arr(1) = vflux_L_arr(1) + coeff_L(q)*(dvel_small(1)) + vflux_L_arr(3) = vflux_L_arr(3) + coeff_L(q)*(4._wp*dvel_small(2))/3._wp + end if + if (q < vidxe) then + vflux_R_arr(1) = vflux_R_arr(1) + coeff_R(q)*(dvel_small(1)) + vflux_R_arr(3) = vflux_R_arr(3) + coeff_R(q)*(4._wp*dvel_small(2))/3._wp + end if + end do + end if - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k + q, l) + alpha_rho_L(i) = 0._wp + alpha_rho_R(i) = 0._wp + alpha_L(i) = 0._wp + alpha_R(i) = 0._wp end do - - if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) - end do - else - alpha_L(1) = 1._wp - end if - $:GPU_LOOP(parallelism='[seq]') do i = 1, num_dims - vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + vel_L(i) = 0._wp + vel_R(i) = 0._wp end do - end do - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k + q, l) - end do - - if (num_fluids > 1) then + do q = vidxb + 1, vidxe $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + do i = 1, num_fluids + alpha_rho_L(i) = alpha_rho_L(i) + coeff_L(q)*q_cons_vf(i)%sf(j, k + q, l) end do - else - alpha_R(1) = 1._wp - end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_L(i) = alpha_L(i) + coeff_L(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + end do + else + alpha_L(1) = 1._wp + end if + + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i) + coeff_L(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + end do end do - end do - if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + alpha_rho_R(i) = alpha_rho_R(i) + coeff_R(q)*q_cons_vf(i)%sf(j, k + q, l) + end do - alpha_L(num_fluids) = 1._wp - alpha_R(num_fluids) = 1._wp + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_R(i) = alpha_R(i) + coeff_R(q)*q_cons_vf(E_idx + i)%sf(j, k + q, l) + end do + else + alpha_R(1) = 1._wp + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 - alpha_L(num_fluids) = alpha_L(num_fluids) - alpha_L(i) - alpha_R(num_fluids) = alpha_R(num_fluids) - alpha_R(i) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_R(i) = vel_R(i) + coeff_R(q)*q_cons_vf(momxb + i - 1)%sf(j, k + q, l) + end do end do - end if - rho_L = 0._wp; rho_R = 0._wp - gamma_L = 0._wp; gamma_R = 0._wp - pi_inf_L = 0._wp; pi_inf_R = 0._wp + if (num_fluids > 1) then - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - rho_L = rho_L + alpha_rho_L(i) - gamma_L = gamma_L + alpha_L(i)*gammas(i) - pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + alpha_L(num_fluids) = 1._wp + alpha_R(num_fluids) = 1._wp - rho_R = rho_R + alpha_rho_R(i) - gamma_R = gamma_R + alpha_R(i)*gammas(i) - pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) - end do + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + alpha_L(num_fluids) = alpha_L(num_fluids) - alpha_L(i) + alpha_R(num_fluids) = alpha_R(num_fluids) - alpha_R(i) + end do + end if - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_dims - vel_L(i) = vel_L(i)/rho_L - vel_R(i) = vel_R(i)/rho_R - end do + rho_L = 0._wp; rho_R = 0._wp + gamma_L = 0._wp; gamma_R = 0._wp + pi_inf_L = 0._wp; pi_inf_R = 0._wp - if (viscous) then - mu_L = 0._wp - mu_R = 0._wp $:GPU_LOOP(parallelism='[seq]') do i = 1, num_fluids - mu_L = alpha_L(i)/Res_igr(1, i) + mu_L - mu_R = alpha_R(i)/Res_igr(1, i) + mu_R + rho_L = rho_L + alpha_rho_L(i) + gamma_L = gamma_L + alpha_L(i)*gammas(i) + pi_inf_L = pi_inf_L + alpha_L(i)*pi_infs(i) + + rho_R = rho_R + alpha_rho_R(i) + gamma_R = gamma_R + alpha_R(i)*gammas(i) + pi_inf_R = pi_inf_R + alpha_R(i)*pi_infs(i) end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_dims + vel_L(i) = vel_L(i)/rho_L + vel_R(i) = vel_R(i)/rho_R + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k)), kind=stp) + if (viscous) then + mu_L = 0._wp + mu_R = 0._wp + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids + mu_L = alpha_L(i)/Res_igr(1, i) + mu_L + mu_R = alpha_R(i)/Res_igr(1, i) + mu_R + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(1)*vel_L(1)*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(1)*vel_R(1)*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & - real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k)), kind=stp) - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_L*vflux_L_arr(3)*vel_L(2)*(1._wp/dy(k)), kind=stp) - E_L = 0._wp; E_R = 0._wp - F_L = 0._wp; F_R = 0._wp + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) - & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb + 1, vidxe - E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_L = F_L + coeff_L(q)*jac(j, k + q, l) - end do + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) + & + real(0.5_wp*dt*mu_R*vflux_R_arr(3)*vel_R(2)*(1._wp/dy(k)), kind=stp) + end if - $:GPU_LOOP(parallelism='[seq]') - do q = vidxb, vidxe - 1 - E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k + q, l) - F_R = F_R + coeff_R(q)*jac(j, k + q, l) - end do + E_L = 0._wp; E_R = 0._wp + F_L = 0._wp; F_R = 0._wp - call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & - E_R, gamma_R, pi_inf_R, rho_R, vel_R, & - pres_L, pres_R, cfl) + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb + 1, vidxe + E_L = E_L + coeff_L(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_L = F_L + coeff_L(q)*jac(j, k + q, l) + end do - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(alpha_rho_L(i)* & - vel_L(2))*(1._wp/dy(k + 1)) - & - 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dy(k + 1)), kind=stp) + $:GPU_LOOP(parallelism='[seq]') + do q = vidxb, vidxe - 1 + E_R = E_R + coeff_R(q)*q_cons_vf(E_idx)%sf(j, k + q, l) + F_R = F_R + coeff_R(q)*jac(j, k + q, l) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - real(0.5_wp*dt*(alpha_rho_L(i)* & - vel_L(2))*(1._wp/dy(k)) - & - 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dy(k)), kind=stp) - end do + call s_get_derived_states(E_L, gamma_L, pi_inf_L, rho_L, vel_L, & + E_R, gamma_R, pi_inf_R, rho_R, vel_R, & + pres_L, pres_R, cfl) - if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 + do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(alpha_L(i)* & - vel_L(2))*(1._wp/dy(k + 1)) - & - 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dy(k + 1)), kind=stp) + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(alpha_rho_L(i)* & + vel_L(2))*(1._wp/dy(k + 1)) - & + 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dy(k + 1)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_L(2)*(1._wp/dy(k + 1)), kind=stp) + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + real(0.5_wp*dt*(alpha_rho_L(i)* & + vel_L(2))*(1._wp/dy(k)) - & + 0.5_wp*dt*cfl*(alpha_rho_L(i))*(1._wp/dy(k)), kind=stp) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - real(0.5_wp*dt*(alpha_L(i)* & - vel_L(2))*(1._wp/dy(k)) - & - 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dy(k)), kind=stp) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(alpha_L(i)* & + vel_L(2))*(1._wp/dy(k + 1)) - & + 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(2)*(1._wp/dy(k)), kind=stp) - end do - end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_L(2)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(rho_L*(vel_L(2))**2.0 + & - pres_L + F_L)*(1._wp/dy(k + 1)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + real(0.5_wp*dt*(alpha_L(i)* & + vel_L(2))*(1._wp/dy(k)) - & + 0.5_wp*dt*cfl*(alpha_L(i))*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - real(0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_L(2)*(1._wp/dy(k)), kind=stp) + end do + end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k + 1)) - & - 0.5_wp*dt*cfl*(E_L)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(rho_L*(vel_L(2))**2.0 + & + pres_L + F_L)*(1._wp/dy(k + 1)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - real(0.5_wp*dt*(rho_L*(vel_L(2))**2.0 + & - pres_L + F_L)*(1._wp/dy(k)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + real(0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k + 1)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - real(0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k)) - & - 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k + 1)) - & + 0.5_wp*dt*cfl*(E_L)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - real(0.5_wp*dt*(vel_L(2)*(E_L + & - pres_L + F_L))*(1._wp/dy(k)) - & - 0.5_wp*dt*cfl*(E_L)*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & + real(0.5_wp*dt*(rho_L*(vel_L(2))**2.0 + & + pres_L + F_L)*(1._wp/dy(k)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(2))*(1._wp/dy(k)), kind=stp) - $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1)), kind=stp) + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + real(0.5_wp*dt*rho_L*vel_L(1)*vel_L(2)*(1._wp/dy(k)) - & + 0.5_wp*dt*cfl*(rho_L*vel_L(1))*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') - rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & - real(0.5_wp*dt*(alpha_rho_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dy(k)), kind=stp) - end do + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & + real(0.5_wp*dt*(vel_L(2)*(E_L + & + pres_L + F_L))*(1._wp/dy(k)) - & + 0.5_wp*dt*cfl*(E_L)*(1._wp/dy(k)), kind=stp) - if (num_fluids > 1) then $:GPU_LOOP(parallelism='[seq]') - do i = 1, num_fluids - 1 + do i = 1, num_fluids $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & - real(0.5_wp*dt*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k + 1)) + & - 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dy(k + 1)), kind=stp) - + rhs_vf(i)%sf(j, k + 1, l) = rhs_vf(i)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(alpha_rho_R(i)* & + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dy(k + 1)), kind=stp) $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & - - real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1)), kind=stp) + rhs_vf(i)%sf(j, k, l) = rhs_vf(i)%sf(j, k, l) - & + real(0.5_wp*dt*(alpha_rho_R(i)* & + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*dt*cfl*(alpha_rho_R(i))*(1._wp/dy(k)), kind=stp) + end do - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & - real(0.5_wp*dt*(alpha_R(i)* & - vel_R(2))*(1._wp/dy(k)) + & - 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dy(k)), kind=stp) + if (num_fluids > 1) then + $:GPU_LOOP(parallelism='[seq]') + do i = 1, num_fluids - 1 + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(alpha_R(i)* & + vel_R(2))*(1._wp/dy(k + 1)) + & + 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & - + real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k)), kind=stp) - end do - end if - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k + 1, l) = rhs_vf(advxb + i - 1)%sf(j, k + 1, l) & + - real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k + 1, l)*vel_R(2)*(1._wp/dy(k + 1)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) - & + real(0.5_wp*dt*(alpha_R(i)* & + vel_R(2))*(1._wp/dy(k)) + & + 0.5_wp*dt*cfl*(alpha_R(i))*(1._wp/dy(k)), kind=stp) + + $:GPU_ATOMIC(atomic='update') + rhs_vf(advxb + i - 1)%sf(j, k, l) = rhs_vf(advxb + i - 1)%sf(j, k, l) & + + real(0.5_wp*dt*q_cons_vf(advxb + i - 1)%sf(j, k, l)*vel_R(2)*(1._wp/dy(k)), kind=stp) + end do + end if + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k + 1, l) = rhs_vf(momxb + 1)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(rho_R*(vel_R(2))**2.0 + & + pres_R + F_R)*(1._wp/dy(k + 1)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & + real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + real(0.5_wp*dt*(vel_R(2)*(E_R + & + pres_R + F_R))*(1._wp/dy(k + 1)) + & + 0.5_wp*dt*cfl*(E_R)*(1._wp/dy(k + 1)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & real(0.5_wp*dt*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k + 1)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k + 1, l) = rhs_vf(momxb)%sf(j, k + 1, l) + & - real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k + 1)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k + 1, l) = rhs_vf(E_idx)%sf(j, k + 1, l) + & + pres_R + F_R)*(1._wp/dy(k)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & + real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & + 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dy(k)), kind=stp) + $:GPU_ATOMIC(atomic='update') + rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & real(0.5_wp*dt*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k + 1)) + & - 0.5_wp*dt*cfl*(E_R)*(1._wp/dy(k + 1)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb + 1)%sf(j, k, l) = rhs_vf(momxb + 1)%sf(j, k, l) - & - real(0.5_wp*dt*(rho_R*(vel_R(2))**2.0 + & - pres_R + F_R)*(1._wp/dy(k)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(2))*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(momxb)%sf(j, k, l) = rhs_vf(momxb)%sf(j, k, l) - & - real(0.5_wp*dt*rho_R*vel_R(2)*vel_R(1)*(1._wp/dy(k)) + & - 0.5_wp*dt*cfl*(rho_R*vel_R(1))*(1._wp/dy(k)), kind=stp) - $:GPU_ATOMIC(atomic='update') - rhs_vf(E_idx)%sf(j, k, l) = rhs_vf(E_idx)%sf(j, k, l) - & - real(0.5_wp*dt*(vel_R(2)*(E_R + & - pres_R + F_R))*(1._wp/dy(k)) + & - 0.5_wp*dt*cfl*(E_R)*(1._wp/dy(k)), kind=stp) + pres_R + F_R))*(1._wp/dy(k)) + & + 0.5_wp*dt*cfl*(E_R)*(1._wp/dy(k)), kind=stp) + end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + $:END_GPU_PARALLEL_LOOP() #:endif else #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 diff --git a/src/simulation/m_pressure_relaxation.fpp b/src/simulation/m_pressure_relaxation.fpp index d87eab8898..b94871044c 100644 --- a/src/simulation/m_pressure_relaxation.fpp +++ b/src/simulation/m_pressure_relaxation.fpp @@ -146,11 +146,11 @@ contains integer, intent(in) :: j, k, l real(wp) :: pres_relax, f_pres, df_pres -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: pres_K_init, rho_K_s -#:else - real(wp), dimension(num_fluids) :: pres_K_init, rho_K_s -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: pres_K_init, rho_K_s + #:else + real(wp), dimension(num_fluids) :: pres_K_init, rho_K_s + #:endif integer, parameter :: MAX_ITER = 50 real(wp), parameter :: TOLERANCE = 1.e-10_wp integer :: iter, i @@ -217,11 +217,11 @@ contains type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf integer, intent(in) :: j, k, l -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(2) :: alpha_rho, alpha -#:else - real(wp), dimension(num_fluids) :: alpha_rho, alpha -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(2) :: alpha_rho, alpha + #:else + real(wp), dimension(num_fluids) :: alpha_rho, alpha + #:endif real(wp) :: rho, dyn_pres, gamma, pi_inf, pres_relax, sum_alpha real(wp), dimension(2) :: Re integer :: i, q diff --git a/src/simulation/m_qbmm.fpp b/src/simulation/m_qbmm.fpp index 4eae61eec9..84f95af557 100644 --- a/src/simulation/m_qbmm.fpp +++ b/src/simulation/m_qbmm.fpp @@ -571,11 +571,11 @@ contains & cray_inline=True) real(wp), intent(in) :: pres, rho, c -#:if USING_AMD - real(wp), dimension(32, 0:2, 0:2), intent(out) :: coeffs -#:else - real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs -#:endif + #:if USING_AMD + real(wp), dimension(32, 0:2, 0:2), intent(out) :: coeffs + #:else + real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs + #:endif integer :: i1, i2 @@ -650,11 +650,11 @@ contains & cray_inline=True) real(wp), intent(in) :: pres, rho, c -#:if USING_AMD - real(wp), dimension(32, 0:2, 0:2), intent(out) :: coeffs -#:else - real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs -#:endif + #:if USING_AMD + real(wp), dimension(32, 0:2, 0:2), intent(out) :: coeffs + #:else + real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeffs + #:endif integer :: i1, i2 @@ -723,18 +723,18 @@ contains real(stp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: mv real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: rhs_mv type(int_bounds_info), intent(in) :: ix, iy, iz -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(6) :: moms, msum - real(wp), dimension(4, 3) :: wght, abscX, abscY, wght_pb, wght_mv, wght_ht, ht -#:else - real(wp), dimension(nmom) :: moms, msum - real(wp), dimension(nnode, nb) :: wght, abscX, abscY, wght_pb, wght_mv, wght_ht, ht -#:endif -#:if USING_AMD - real(wp), dimension(32, 0:2, 0:2) :: coeff -#:else - real(wp), dimension(nterms, 0:2, 0:2) :: coeff -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(6) :: moms, msum + real(wp), dimension(4, 3) :: wght, abscX, abscY, wght_pb, wght_mv, wght_ht, ht + #:else + real(wp), dimension(nmom) :: moms, msum + real(wp), dimension(nnode, nb) :: wght, abscX, abscY, wght_pb, wght_mv, wght_ht, ht + #:endif + #:if USING_AMD + real(wp), dimension(32, 0:2, 0:2) :: coeff + #:else + real(wp), dimension(nterms, 0:2, 0:2) :: coeff + #:endif real(wp) :: pres, rho, nbub, c, alf, momsum, drdt, drdt2, chi_vw, x_vw, rho_mw, k_mw, grad_T real(wp) :: n_tait, B_tait integer :: id1, id2, id3, i1, i2, j, q, r @@ -887,11 +887,11 @@ contains $:GPU_ROUTINE(function_name='s_coeff_selector',parallelism='[seq]', & & cray_inline=True) real(wp), intent(in) :: pres, rho, c -#:if USING_AMD - real(wp), dimension(32, 0:2, 0:2), intent(out) :: coeff -#:else - real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeff -#:endif + #:if USING_AMD + real(wp), dimension(32, 0:2, 0:2), intent(out) :: coeff + #:else + real(wp), dimension(nterms, 0:2, 0:2), intent(out) :: coeff + #:endif logical, intent(in) :: polytropic if (polytropic) then call s_coeff(pres, rho, c, coeff) @@ -980,11 +980,11 @@ contains function f_quad(abscX, abscY, wght_in, q, r, s) $:GPU_ROUTINE(parallelism='[seq]') -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(4, 3), intent(in) :: abscX, abscY, wght_in -#:else - real(wp), dimension(nnode, nb), intent(in) :: abscX, abscY, wght_in -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(4, 3), intent(in) :: abscX, abscY, wght_in + #:else + real(wp), dimension(nnode, nb), intent(in) :: abscX, abscY, wght_in + #:endif real(wp), intent(in) :: q, r, s real(wp) :: f_quad_RV, f_quad @@ -1005,11 +1005,11 @@ contains function f_quad2D(abscX, abscY, wght_in, pow) $:GPU_ROUTINE(parallelism='[seq]') -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(4), intent(in) :: abscX, abscY, wght_in -#:else - real(wp), dimension(nnode), intent(in) :: abscX, abscY, wght_in -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(4), intent(in) :: abscX, abscY, wght_in + #:else + real(wp), dimension(nnode), intent(in) :: abscX, abscY, wght_in + #:endif real(wp), dimension(3), intent(in) :: pow real(wp) :: f_quad2D diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 9689be4db0..e5788b4215 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -702,7 +702,7 @@ contains call s_populate_variables_buffers(bc_type, q_cons_vf, pb_in, mv_in) call nvtxEndRange end if - if(.not. igr .or. dummy) then + if (.not. igr .or. dummy) then call nvtxStartRange("RHS-CONVERT") call s_convert_conservative_to_primitive_variables( & q_cons_qp%vf, & @@ -780,8 +780,8 @@ contains call s_igr_sigma_x(q_cons_vf, rhs_vf) call nvtxEndRange end if - end if - if((.not. igr) .or. dummy) then! Finite volume solve + end if + if ((.not. igr) .or. dummy) then! Finite volume solve ! Reconstructing Primitive/Conservative Variables call nvtxStartRange("RHS-WENO") @@ -1953,7 +1953,7 @@ contains end if #:endfor - if (recon_dir == 1) then + if (recon_dir == 1) then $:GPU_PARALLEL_LOOP(private='[i,j,k,l]', collapse=4) do i = iv%beg, iv%end do l = is3%beg, is3%end diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index d8b5792fe0..b93b342ea5 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -291,22 +291,22 @@ contains integer, intent(in) :: norm_dir type(int_bounds_info), intent(in) :: ix, iy, iz -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: alpha_rho_L, alpha_rho_R - real(wp), dimension(3) :: vel_L, vel_R - real(wp), dimension(3) :: alpha_L, alpha_R - real(wp), dimension(10) :: Ys_L, Ys_R - real(wp), dimension(10) :: Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR - real(wp), dimension(10) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 -#:else - real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R - real(wp), dimension(num_vels) :: vel_L, vel_R - real(wp), dimension(num_fluids) :: alpha_L, alpha_R - real(wp), dimension(num_species) :: Ys_L, Ys_R - real(wp), dimension(num_species) :: Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR - real(wp), dimension(num_species) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 -#:endif - real(wp) :: rho_L, rho_R + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha_rho_L, alpha_rho_R + real(wp), dimension(3) :: vel_L, vel_R + real(wp), dimension(3) :: alpha_L, alpha_R + real(wp), dimension(10) :: Ys_L, Ys_R + real(wp), dimension(10) :: Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR + real(wp), dimension(10) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 + #:else + real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R + real(wp), dimension(num_vels) :: vel_L, vel_R + real(wp), dimension(num_fluids) :: alpha_L, alpha_R + real(wp), dimension(num_species) :: Ys_L, Ys_R + real(wp), dimension(num_species) :: Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR + real(wp), dimension(num_species) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 + #:endif + real(wp) :: rho_L, rho_R real(wp) :: pres_L, pres_R real(wp) :: E_L, E_R real(wp) :: H_L, H_R @@ -493,13 +493,13 @@ contains call get_mixture_molecular_weight(Ys_L, MW_L) call get_mixture_molecular_weight(Ys_R, MW_R) -#:if USING_AMD - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights_nonparameter(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights_nonparameter(:) -#:else - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) -#:endif + #:if USING_AMD + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights_nonparameter(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights_nonparameter(:) + #:else + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + #:endif R_gas_L = gas_constant/MW_L R_gas_R = gas_constant/MW_R @@ -1068,25 +1068,25 @@ contains integer, intent(in) :: norm_dir type(int_bounds_info), intent(in) :: ix, iy, iz -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: alpha_rho_L, alpha_rho_R - real(wp), dimension(3) :: vel_L, vel_R - real(wp), dimension(3) :: alpha_L, alpha_R - real(wp), dimension(10) :: Ys_L, Ys_R - real(wp), dimension(10) :: Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR - real(wp), dimension(10) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 - real(wp), dimension(3, 3) :: vel_grad_L, vel_grad_R !< Averaged velocity gradient tensor `d(vel_i)/d(coord_j)`. -#:else - real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R - real(wp), dimension(num_vels) :: vel_L, vel_R - real(wp), dimension(num_fluids) :: alpha_L, alpha_R - real(wp), dimension(num_species) :: Ys_L, Ys_R - real(wp), dimension(num_species) :: Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR - real(wp), dimension(num_species) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 - real(wp), dimension(num_dims, num_dims) :: vel_grad_L, vel_grad_R !< Averaged velocity gradient tensor `d(vel_i)/d(coord_j)`. -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha_rho_L, alpha_rho_R + real(wp), dimension(3) :: vel_L, vel_R + real(wp), dimension(3) :: alpha_L, alpha_R + real(wp), dimension(10) :: Ys_L, Ys_R + real(wp), dimension(10) :: Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR + real(wp), dimension(10) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 + real(wp), dimension(3, 3) :: vel_grad_L, vel_grad_R !< Averaged velocity gradient tensor `d(vel_i)/d(coord_j)`. + #:else + real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R + real(wp), dimension(num_vels) :: vel_L, vel_R + real(wp), dimension(num_fluids) :: alpha_L, alpha_R + real(wp), dimension(num_species) :: Ys_L, Ys_R + real(wp), dimension(num_species) :: Cp_iL, Cp_iR, Xs_L, Xs_R, Gamma_iL, Gamma_iR + real(wp), dimension(num_species) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 + real(wp), dimension(num_dims, num_dims) :: vel_grad_L, vel_grad_R !< Averaged velocity gradient tensor `d(vel_i)/d(coord_j)`. + #:endif real(wp) :: rho_L, rho_R - + real(wp) :: pres_L, pres_R real(wp) :: E_L, E_R real(wp) :: H_L, H_R @@ -1274,13 +1274,13 @@ contains call get_mixture_molecular_weight(Ys_L, MW_L) call get_mixture_molecular_weight(Ys_R, MW_R) -#:if USING_AMD - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights_nonparameter(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights_nonparameter(:) -#:else - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) -#:endif + #:if USING_AMD + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights_nonparameter(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights_nonparameter(:) + #:else + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + #:endif R_gas_L = gas_constant/MW_L R_gas_R = gas_constant/MW_R @@ -1319,28 +1319,28 @@ contains H_R = (E_R + pres_R)/rho_R elseif (mhd .and. relativity) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) - Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) - vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) - vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) + Ga%L = 1._wp/sqrt(1._wp - vel_L_rms) + Ga%R = 1._wp/sqrt(1._wp - vel_R_rms) + vdotB%L = vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3) + vdotB%R = vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3) - b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L - b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R - B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp - B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp + b4%L(1:3) = B%L(1:3)/Ga%L + Ga%L*vel_L(1:3)*vdotB%L + b4%R(1:3) = B%R(1:3)/Ga%R + Ga%R*vel_R(1:3)*vdotB%R + B2%L = B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp + B2%R = B%R(1)**2._wp + B%R(2)**2._wp + B%R(3)**2._wp - pres_mag%L = 0.5_wp*(B2%L/Ga%L**2._wp + vdotB%L**2._wp) - pres_mag%R = 0.5_wp*(B2%R/Ga%R**2._wp + vdotB%R**2._wp) + pres_mag%L = 0.5_wp*(B2%L/Ga%L**2._wp + vdotB%L**2._wp) + pres_mag%R = 0.5_wp*(B2%R/Ga%R**2._wp + vdotB%R**2._wp) - ! Hard-coded EOS - H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L - H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R + ! Hard-coded EOS + H_L = 1._wp + (gamma_L + 1)*pres_L/rho_L + H_R = 1._wp + (gamma_R + 1)*pres_R/rho_R - cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) - cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) + cm%L(1:3) = (rho_L*H_L*Ga%L**2 + B2%L)*vel_L(1:3) - vdotB%L*B%L(1:3) + cm%R(1:3) = (rho_R*H_R*Ga%R**2 + B2%R)*vel_R(1:3) - vdotB%R*B%R(1:3) - E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L - E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R + E_L = rho_L*H_L*Ga%L**2 - pres_L + 0.5_wp*(B2%L + vel_L_rms*B2%L - vdotB%L**2._wp) - rho_L*Ga%L + E_R = rho_R*H_R*Ga%R**2 - pres_R + 0.5_wp*(B2%R + vel_R_rms*B2%R - vdotB%R**2._wp) - rho_R*Ga%R #:endif elseif (mhd .and. .not. relativity) then pres_mag%L = 0.5_wp*(B%L(1)**2._wp + B%L(2)**2._wp + B%L(3)**2._wp) @@ -1529,11 +1529,11 @@ contains if (mhd .and. (.not. relativity)) then ! energy flux = (E + p + p_mag) * v_${XYZ}$ - B_${XYZ}$ * (v_x*B_x + v_y*B_y + v_z*B_z) #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - flux_rs${XYZ}$_vf(j, k, l, E_idx) = & - (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & - - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & - + s_M*s_P*(E_L - E_R)) & - /(s_M - s_P) + flux_rs${XYZ}$_vf(j, k, l, E_idx) = & + (s_M*(vel_R(norm_dir)*(E_R + pres_R + pres_mag%R) - B%R(norm_dir)*(vel_R(1)*B%R(1) + vel_R(2)*B%R(2) + vel_R(3)*B%R(3))) & + - s_P*(vel_L(norm_dir)*(E_L + pres_L + pres_mag%L) - B%L(norm_dir)*(vel_L(1)*B%L(1) + vel_L(2)*B%L(2) + vel_L(3)*B%L(3))) & + + s_M*s_P*(E_L - E_R)) & + /(s_M - s_P) #:endif elseif (mhd .and. relativity) then ! energy flux = m_${XYZ}$ - mass flux @@ -1756,16 +1756,16 @@ contains vel_grad_L(i, 1) = (dqL_prim_dx_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) vel_grad_R(i, 1) = (dqR_prim_dx_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - if (num_dims > 1) then - vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) - vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) - end if - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - if (num_dims > 2) then - vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) - vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) - end if - #:endif + if (num_dims > 1) then + vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) + vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) + end if + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + if (num_dims > 2) then + vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(1)) + vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(1)) + end if + #:endif #:endif end do @@ -1773,60 +1773,60 @@ contains flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(1) + vel_grad_R(1, 1)*vel_R(1)) #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - if (num_dims > 1) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) - - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(2) + vel_grad_R(1, 2)*vel_R(2)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(2) + vel_grad_R(2, 1)*vel_R(2)) - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - if (num_dims > 2) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) - - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(3) + vel_grad_R(1, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(3) + vel_grad_R(3, 1)*vel_R(3)) + if (num_dims > 1) then + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) + + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(2) + vel_grad_R(1, 2)*vel_R(2)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(2) + vel_grad_R(2, 1)*vel_R(2)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + if (num_dims > 2) then + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) + + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(3) + vel_grad_R(1, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(3) + vel_grad_R(3, 1)*vel_R(3)) + end if + #:endif end if - #:endif - end if #:endif else if (norm_dir == 2) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(1) + vel_grad_R(1, 2)*vel_R(1)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(1) + vel_grad_R(2, 1)*vel_R(1)) - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - if (num_dims > 2) then - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2) + vel_grad_R(1, 2)) - 0.5_wp*(vel_grad_L(2, 1) + vel_grad_R(2, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 2)*vel_L(1) + vel_grad_R(1, 2)*vel_R(1)) - 0.5_wp*(vel_grad_L(2, 1)*vel_L(1) + vel_grad_R(2, 1)*vel_R(1)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + if (num_dims > 2) then + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(3) + vel_grad_R(2, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(3) + vel_grad_R(3, 2)*vel_R(3)) - end if - #:endif + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(3) + vel_grad_R(2, 3)*vel_R(3)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(3) + vel_grad_R(3, 2)*vel_R(3)) + end if + #:endif #:endif else #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(3) + vel_grad_R(2, 2)*vel_R(3)) + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (-2._wp/3._wp)*0.5_wp*(vel_grad_L(2, 2)*vel_L(3) + vel_grad_R(2, 2)*vel_R(3)) - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(1) + vel_grad_R(1, 3)*vel_R(1)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(1) + vel_grad_R(3, 1)*vel_R(1)) + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3) + vel_grad_R(1, 3)) - 0.5_wp*(vel_grad_L(3, 1) + vel_grad_R(3, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 3)*vel_L(1) + vel_grad_R(1, 3)*vel_R(1)) - 0.5_wp*(vel_grad_L(3, 1)*vel_L(1) + vel_grad_R(3, 1)*vel_R(1)) - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - (4._wp/3._wp)*0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(2) + vel_grad_R(2, 3)*vel_R(2)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(2) + vel_grad_R(3, 2)*vel_R(2)) + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3) + vel_grad_R(2, 3)) - 0.5_wp*(vel_grad_L(3, 2) + vel_grad_R(3, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 3)*vel_L(2) + vel_grad_R(2, 3)*vel_R(2)) - 0.5_wp*(vel_grad_L(3, 2)*vel_L(2) + vel_grad_R(3, 2)*vel_R(2)) #:endif end if end if @@ -1838,16 +1838,16 @@ contains vel_grad_L(i, 1) = (dqL_prim_dx_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) vel_grad_R(i, 1) = (dqR_prim_dx_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - if (num_dims > 1) then - vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) - vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) - end if + if (num_dims > 1) then + vel_grad_L(i, 2) = (dqL_prim_dy_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) + vel_grad_R(i, 2) = (dqR_prim_dy_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) + end if #:endif - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - if (num_dims > 2) then - vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) - vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) - end if + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + if (num_dims > 2) then + vel_grad_L(i, 3) = (dqL_prim_dz_vf(momxb + i - 1)%sf(j, k, l)/Re_L(2)) + vel_grad_R(i, 3) = (dqR_prim_dz_vf(momxb + i - 1)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))/Re_R(2)) + end if #:endif end do @@ -1855,44 +1855,44 @@ contains flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(1) + vel_grad_R(1, 1)*vel_R(1)) #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - if (num_dims > 1) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) - - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - if (num_dims > 2) then - flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) + if (num_dims > 1) then + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(1) + vel_grad_R(2, 2)*vel_R(1)) + + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + if (num_dims > 2) then + flux_src_vf(momxb)%sf(j, k, l) = flux_src_vf(momxb)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(1) + vel_grad_R(3, 3)*vel_R(1)) + end if + #:endif end if - #:endif - end if #:endif else if (norm_dir == 2) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(2) + vel_grad_R(1, 1)*vel_R(2)) - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(2) + vel_grad_R(2, 2)*vel_R(2)) - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - if (num_dims > 2) then - flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) - end if - #:endif + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + if (num_dims > 2) then + flux_src_vf(momxb + 1)%sf(j, k, l) = flux_src_vf(momxb + 1)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(2) + vel_grad_R(3, 3)*vel_R(2)) + end if + #:endif #:endif else #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1) + vel_grad_R(1, 1)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(1, 1)*vel_L(3) + vel_grad_R(1, 1)*vel_R(3)) - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(3) + vel_grad_R(2, 2)*vel_R(3)) + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2) + vel_grad_R(2, 2)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(2, 2)*vel_L(3) + vel_grad_R(2, 2)*vel_R(3)) - flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) + flux_src_vf(momxb + 2)%sf(j, k, l) = flux_src_vf(momxb + 2)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3) + vel_grad_R(3, 3)) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) - 0.5_wp*(vel_grad_L(3, 3)*vel_L(3) + vel_grad_R(3, 3)*vel_R(3)) #:endif end if @@ -1972,27 +1972,27 @@ contains integer, intent(in) :: norm_dir type(int_bounds_info), intent(in) :: ix, iy, iz -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: alpha_rho_L, alpha_rho_R - real(wp), dimension(3) :: alpha_L, alpha_R - real(wp), dimension(3) :: vel_L, vel_R -#:else - real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R - real(wp), dimension(num_fluids) :: alpha_L, alpha_R - real(wp), dimension(num_dims) :: vel_L, vel_R -#:endif - - real(wp) :: rho_L, rho_R + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha_rho_L, alpha_rho_R + real(wp), dimension(3) :: alpha_L, alpha_R + real(wp), dimension(3) :: vel_L, vel_R + #:else + real(wp), dimension(num_fluids) :: alpha_rho_L, alpha_rho_R + real(wp), dimension(num_fluids) :: alpha_L, alpha_R + real(wp), dimension(num_dims) :: vel_L, vel_R + #:endif + + real(wp) :: rho_L, rho_R real(wp) :: pres_L, pres_R real(wp) :: E_L, E_R real(wp) :: H_L, H_R -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(10) :: Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR - real(wp), dimension(10) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 -#:else - real(wp), dimension(num_species) :: Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR - real(wp), dimension(num_species) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(10) :: Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR + real(wp), dimension(10) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 + #:else + real(wp), dimension(num_species) :: Ys_L, Ys_R, Xs_L, Xs_R, Gamma_iL, Gamma_iR, Cp_iL, Cp_iR + real(wp), dimension(num_species) :: Yi_avg, Phi_avg, h_iL, h_iR, h_avg_2 + #:endif real(wp) :: Cp_avg, Cv_avg, T_avg, c_sum_Yi_Phi, eps real(wp) :: T_L, T_R real(wp) :: MW_L, MW_R @@ -2017,17 +2017,17 @@ contains real(wp) :: xi_L, xi_R !< Left and right wave speeds functions real(wp) :: xi_M, xi_P real(wp) :: xi_MP, xi_PP -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: R0_L, R0_R - real(wp), dimension(3) :: V0_L, V0_R - real(wp), dimension(3) :: P0_L, P0_R - real(wp), dimension(3) :: pbw_L, pbw_R -#:else - real(wp), dimension(nb) :: R0_L, R0_R - real(wp), dimension(nb) :: V0_L, V0_R - real(wp), dimension(nb) :: P0_L, P0_R - real(wp), dimension(nb) :: pbw_L, pbw_R -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: R0_L, R0_R + real(wp), dimension(3) :: V0_L, V0_R + real(wp), dimension(3) :: P0_L, P0_R + real(wp), dimension(3) :: pbw_L, pbw_R + #:else + real(wp), dimension(nb) :: R0_L, R0_R + real(wp), dimension(nb) :: V0_L, V0_R + real(wp), dimension(nb) :: P0_L, P0_R + real(wp), dimension(nb) :: pbw_L, pbw_R + #:endif real(wp) :: alpha_L_sum, alpha_R_sum, nbub_L, nbub_R real(wp) :: ptilde_L, ptilde_R @@ -2037,11 +2037,11 @@ contains real(wp) :: R3V2Lbar, R3V2Rbar real(wp), dimension(6) :: tau_e_L, tau_e_R -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: xi_field_L, xi_field_R -#:else - real(wp), dimension(num_dims) :: xi_field_L, xi_field_R -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: xi_field_L, xi_field_R + #:else + real(wp), dimension(num_dims) :: xi_field_L, xi_field_R + #:endif real(wp) :: G_L, G_R real(wp) :: vel_L_rms, vel_R_rms, vel_avg_rms @@ -3262,13 +3262,13 @@ contains call get_mixture_molecular_weight(Ys_L, MW_L) call get_mixture_molecular_weight(Ys_R, MW_R) -#:if USING_AMD - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights_nonparameter(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights_nonparameter(:) -#:else - Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) - Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) -#:endif + #:if USING_AMD + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights_nonparameter(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights_nonparameter(:) + #:else + Xs_L(:) = Ys_L(:)*MW_L/molecular_weights(:) + Xs_R(:) = Ys_R(:)*MW_R/molecular_weights(:) + #:endif R_gas_L = gas_constant/MW_L R_gas_R = gas_constant/MW_R @@ -3720,11 +3720,11 @@ contains type(int_bounds_info), intent(in) :: ix, iy, iz ! Local variables: -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: alpha_L, alpha_R, alpha_rho_L, alpha_rho_R -#:else - real(wp), dimension(num_fluids) :: alpha_L, alpha_R, alpha_rho_L, alpha_rho_R -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha_L, alpha_R, alpha_rho_L, alpha_rho_R + #:else + real(wp), dimension(num_fluids) :: alpha_L, alpha_R, alpha_rho_L, alpha_rho_R + #:endif type(riemann_states_vec3) :: vel type(riemann_states) :: rho, pres, E, H_no_mag type(riemann_states) :: gamma, pi_inf, qv @@ -4682,21 +4682,21 @@ contains type(int_bounds_info), intent(in) :: ix, iy, iz ! Local variables -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: avg_v_int !!< Averaged interface velocity $(v_x, v_y, v_z)$ (grid directions). - real(wp), dimension(3) :: avg_dvdx_int !!< Averaged interface $\partial v_i/\partial x$ (grid dir 1). - real(wp), dimension(3) :: avg_dvdy_int !!< Averaged interface $\partial v_i/\partial y$ (grid dir 2). - real(wp), dimension(3) :: avg_dvdz_int !!< Averaged interface $\partial v_i/\partial z$ (grid dir 3). - real(wp), dimension(3) :: vel_src_int !!< Interface velocity $(v_1,v_2,v_3)$ (grid directions) for viscous work. - real(wp), dimension(3) :: stress_vector_shear !!< Shear stress vector $(\sigma_{N1}, \sigma_{N2}, \sigma_{N3})$ on N-face (grid directions). -#:else - real(wp), dimension(num_dims) :: avg_v_int !!< Averaged interface velocity $(v_x, v_y, v_z)$ (grid directions). - real(wp), dimension(num_dims) :: avg_dvdx_int !!< Averaged interface $\partial v_i/\partial x$ (grid dir 1). - real(wp), dimension(num_dims) :: avg_dvdy_int !!< Averaged interface $\partial v_i/\partial y$ (grid dir 2). - real(wp), dimension(num_dims) :: avg_dvdz_int !!< Averaged interface $\partial v_i/\partial z$ (grid dir 3). - real(wp), dimension(num_dims) :: vel_src_int !!< Interface velocity $(v_1,v_2,v_3)$ (grid directions) for viscous work. - real(wp), dimension(num_dims) :: stress_vector_shear !!< Shear stress vector $(\sigma_{N1}, \sigma_{N2}, \sigma_{N3})$ on N-face (grid directions). -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: avg_v_int !!< Averaged interface velocity $(v_x, v_y, v_z)$ (grid directions). + real(wp), dimension(3) :: avg_dvdx_int !!< Averaged interface $\partial v_i/\partial x$ (grid dir 1). + real(wp), dimension(3) :: avg_dvdy_int !!< Averaged interface $\partial v_i/\partial y$ (grid dir 2). + real(wp), dimension(3) :: avg_dvdz_int !!< Averaged interface $\partial v_i/\partial z$ (grid dir 3). + real(wp), dimension(3) :: vel_src_int !!< Interface velocity $(v_1,v_2,v_3)$ (grid directions) for viscous work. + real(wp), dimension(3) :: stress_vector_shear !!< Shear stress vector $(\sigma_{N1}, \sigma_{N2}, \sigma_{N3})$ on N-face (grid directions). + #:else + real(wp), dimension(num_dims) :: avg_v_int !!< Averaged interface velocity $(v_x, v_y, v_z)$ (grid directions). + real(wp), dimension(num_dims) :: avg_dvdx_int !!< Averaged interface $\partial v_i/\partial x$ (grid dir 1). + real(wp), dimension(num_dims) :: avg_dvdy_int !!< Averaged interface $\partial v_i/\partial y$ (grid dir 2). + real(wp), dimension(num_dims) :: avg_dvdz_int !!< Averaged interface $\partial v_i/\partial z$ (grid dir 3). + real(wp), dimension(num_dims) :: vel_src_int !!< Interface velocity $(v_1,v_2,v_3)$ (grid directions) for viscous work. + real(wp), dimension(num_dims) :: stress_vector_shear !!< Shear stress vector $(\sigma_{N1}, \sigma_{N2}, \sigma_{N3})$ on N-face (grid directions). + #:endif real(wp) :: stress_normal_bulk !!< Normal bulk stress component $\sigma_{NN}$ on N-face. real(wp) :: Re_s, Re_b !!< Effective interface shear and bulk Reynolds numbers. @@ -4760,12 +4760,12 @@ contains ! Divergence in cylindrical coordinates (vx=vz_cyl, vy=vr_cyl, vz=vtheta_cyl) #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - divergence_cyl = avg_dvdx_int(1) + avg_dvdy_int(2) + avg_v_int(2)/r_eff - if (num_dims > 2) then - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - divergence_cyl = divergence_cyl + avg_dvdz_int(3)/r_eff - #:endif - end if + divergence_cyl = avg_dvdx_int(1) + avg_dvdy_int(2) + avg_v_int(2)/r_eff + if (num_dims > 2) then + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + divergence_cyl = divergence_cyl + avg_dvdz_int(3)/r_eff + #:endif + end if #:endif stress_vector_shear = 0.0_wp @@ -4779,7 +4779,7 @@ contains stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const if (num_dims > 1) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - stress_vector_shear(2) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s + stress_vector_shear(2) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s #:endif end if if (num_dims > 2) then @@ -4790,13 +4790,13 @@ contains case (2) ! Y-face (radial normal, r_cyl) if (num_dims > 1) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - stress_vector_shear(1) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s - stress_vector_shear(2) = (2.0_wp*avg_dvdy_int(2))/Re_s + div_v_term_const - if (num_dims > 2) then - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - stress_vector_shear(3) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s - #:endif - end if + stress_vector_shear(1) = (avg_dvdy_int(1) + avg_dvdx_int(2))/Re_s + stress_vector_shear(2) = (2.0_wp*avg_dvdy_int(2))/Re_s + div_v_term_const + if (num_dims > 2) then + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + stress_vector_shear(3) = (avg_dvdz_int(2)/r_eff - avg_v_int(3)/r_eff + avg_dvdy_int(3))/Re_s + #:endif + end if #:endif else stress_vector_shear(1) = (2.0_wp*avg_dvdx_int(1))/Re_s + div_v_term_const @@ -4865,17 +4865,17 @@ contains integer, intent(in) :: norm_dir ! Local variables -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3, 3) :: vel_grad_avg !< Averaged velocity gradient tensor `d(vel_i)/d(coord_j)`. - real(wp), dimension(3, 3) :: current_tau_shear !< Current shear stress tensor. - real(wp), dimension(3, 3) :: current_tau_bulk !< Current bulk stress tensor. - real(wp), dimension(3) :: vel_src_at_interface !< Interface velocities (u,v,w) for viscous work. -#:else - real(wp), dimension(num_dims, num_dims) :: vel_grad_avg !< Averaged velocity gradient tensor `d(vel_i)/d(coord_j)`. - real(wp), dimension(num_dims, num_dims) :: current_tau_shear !< Current shear stress tensor. - real(wp), dimension(num_dims, num_dims) :: current_tau_bulk !< Current bulk stress tensor. - real(wp), dimension(num_dims) :: vel_src_at_interface !< Interface velocities (u,v,w) for viscous work. -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3, 3) :: vel_grad_avg !< Averaged velocity gradient tensor `d(vel_i)/d(coord_j)`. + real(wp), dimension(3, 3) :: current_tau_shear !< Current shear stress tensor. + real(wp), dimension(3, 3) :: current_tau_bulk !< Current bulk stress tensor. + real(wp), dimension(3) :: vel_src_at_interface !< Interface velocities (u,v,w) for viscous work. + #:else + real(wp), dimension(num_dims, num_dims) :: vel_grad_avg !< Averaged velocity gradient tensor `d(vel_i)/d(coord_j)`. + real(wp), dimension(num_dims, num_dims) :: current_tau_shear !< Current shear stress tensor. + real(wp), dimension(num_dims, num_dims) :: current_tau_bulk !< Current bulk stress tensor. + real(wp), dimension(num_dims) :: vel_src_at_interface !< Interface velocities (u,v,w) for viscous work. + #:endif integer, dimension(3) :: idx_right_phys !< Physical (j,k,l) indices for right state. real(wp) :: Re_shear !< Interface shear Reynolds number. @@ -4905,8 +4905,8 @@ contains dvelR_dx_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) if (num_dims > 1) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - vel_grad_avg(vel_comp_idx, 2) = 0.5_wp*(dvelL_dy_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & - dvelR_dy_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) + vel_grad_avg(vel_comp_idx, 2) = 0.5_wp*(dvelL_dy_vf(vel_comp_idx)%sf(j_loop, k_loop, l_loop) + & + dvelR_dy_vf(vel_comp_idx)%sf(idx_right_phys(1), idx_right_phys(2), idx_right_phys(3))) #:endif end if if (num_dims > 2) then @@ -4988,13 +4988,13 @@ contains $:GPU_ROUTINE(parallelism='[seq]') ! Arguments -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3, 3), intent(in) :: vel_grad_avg - real(wp), dimension(3, 3), intent(out) :: tau_shear_out -#:else - real(wp), dimension(num_dims, num_dims), intent(in) :: vel_grad_avg - real(wp), dimension(num_dims, num_dims), intent(out) :: tau_shear_out -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3, 3), intent(in) :: vel_grad_avg + real(wp), dimension(3, 3), intent(out) :: tau_shear_out + #:else + real(wp), dimension(num_dims, num_dims), intent(in) :: vel_grad_avg + real(wp), dimension(num_dims, num_dims), intent(out) :: tau_shear_out + #:endif real(wp), intent(in) :: Re_shear real(wp), intent(in) :: divergence_v @@ -5027,11 +5027,11 @@ contains ! Arguments real(wp), intent(in) :: Re_bulk real(wp), intent(in) :: divergence_v -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3, 3), intent(out) :: tau_bulk_out -#:else - real(wp), dimension(num_dims, num_dims), intent(out) :: tau_bulk_out -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3, 3), intent(out) :: tau_bulk_out + #:else + real(wp), dimension(num_dims, num_dims), intent(out) :: tau_bulk_out + #:endif ! Local variables integer :: i_dim !< Loop iterator for diagonal components. diff --git a/src/simulation/m_sim_helpers.fpp b/src/simulation/m_sim_helpers.fpp index 64f9fd427d..f5d75aa148 100644 --- a/src/simulation/m_sim_helpers.fpp +++ b/src/simulation/m_sim_helpers.fpp @@ -97,22 +97,22 @@ contains & cray_inline=True) type(scalar_field), intent(in), dimension(sys_size) :: q_prim_vf -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), intent(inout), dimension(3) :: alpha - real(wp), intent(inout), dimension(3) :: vel -#:else - real(wp), intent(inout), dimension(num_fluids) :: alpha - real(wp), intent(inout), dimension(num_vels) :: vel -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), intent(inout), dimension(3) :: alpha + real(wp), intent(inout), dimension(3) :: vel + #:else + real(wp), intent(inout), dimension(num_fluids) :: alpha + real(wp), intent(inout), dimension(num_vels) :: vel + #:endif real(wp), intent(inout) :: rho, gamma, pi_inf, vel_sum, H, pres real(wp), intent(out) :: qv integer, intent(in) :: j, k, l real(wp), dimension(2), intent(inout) :: Re -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: alpha_rho, Gs -#:else - real(wp), dimension(num_fluids) :: alpha_rho, Gs -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha_rho, Gs + #:else + real(wp), dimension(num_fluids) :: alpha_rho, Gs + #:endif real(wp) :: E, G_local integer :: i diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 86284622c6..d46a3e0793 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1375,8 +1375,8 @@ contains ! the modules. The preparations below DO DEPEND on the grid being complete. if (igr .or. dummy) then call s_initialize_igr_module() - end if - if(.not. igr .or. dummy) then + end if + if (.not. igr .or. dummy) then if (recon_type == WENO_TYPE) then call s_initialize_weno_module() elseif (recon_type == MUSCL_TYPE) then diff --git a/src/simulation/m_surface_tension.fpp b/src/simulation/m_surface_tension.fpp index 3afa1bd4b2..99bac7d722 100644 --- a/src/simulation/m_surface_tension.fpp +++ b/src/simulation/m_surface_tension.fpp @@ -82,11 +82,11 @@ contains intent(inout) :: flux_src_vf integer, intent(in) :: id type(int_bounds_info), intent(in) :: isx, isy, isz -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3, 3) :: Omega -#:else - real(wp), dimension(num_dims, num_dims) :: Omega -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3, 3) :: Omega + #:else + real(wp), dimension(num_dims, num_dims) :: Omega + #:endif real(wp) :: w1L, w1R, w2L, w2R, w3L, w3R, w1, w2, w3 real(wp) :: normWL, normWR, normW integer :: j, k, l, i @@ -138,49 +138,49 @@ contains elseif (id == 2) then #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - $:GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') - do l = isz%beg, isz%end - do k = isy%beg, isy%end - do j = isx%beg, isx%end + $:GPU_PARALLEL_LOOP(collapse=3, private='[Omega, w1L, w2L, w3L, w1R, w2R, w3R, w1, w2, w3, normWL, normWR, normW]') + do l = isz%beg, isz%end + do k = isy%beg, isy%end + do j = isx%beg, isx%end - w1L = gL_y(k, j, l, 1) - w2L = gL_y(k, j, l, 2) - w3L = 0._wp - if (p > 0) w3L = gL_y(k, j, l, 3) + w1L = gL_y(k, j, l, 1) + w2L = gL_y(k, j, l, 2) + w3L = 0._wp + if (p > 0) w3L = gL_y(k, j, l, 3) - w1R = gR_y(k + 1, j, l, 1) - w2R = gR_y(k + 1, j, l, 2) - w3R = 0._wp - if (p > 0) w3R = gR_y(k + 1, j, l, 3) + w1R = gR_y(k + 1, j, l, 1) + w2R = gR_y(k + 1, j, l, 2) + w3R = 0._wp + if (p > 0) w3R = gR_y(k + 1, j, l, 3) - normWL = gL_y(k, j, l, num_dims + 1) - normWR = gR_y(k + 1, j, l, num_dims + 1) + normWL = gL_y(k, j, l, num_dims + 1) + normWR = gR_y(k + 1, j, l, num_dims + 1) - w1 = (w1L + w1R)/2._wp - w2 = (w2L + w2R)/2._wp - w3 = (w3L + w3R)/2._wp - normW = (normWL + normWR)/2._wp + w1 = (w1L + w1R)/2._wp + w2 = (w2L + w2R)/2._wp + w3 = (w3L + w3R)/2._wp + normW = (normWL + normWR)/2._wp - if (normW > capillary_cutoff) then - @:compute_capillary_stress_tensor() + if (normW > capillary_cutoff) then + @:compute_capillary_stress_tensor() - do i = 1, num_dims + do i = 1, num_dims - flux_src_vf(momxb + i - 1)%sf(j, k, l) = & - flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(2, i) + flux_src_vf(momxb + i - 1)%sf(j, k, l) = & + flux_src_vf(momxb + i - 1)%sf(j, k, l) + Omega(2, i) - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - Omega(2, i)*vSrc_rsy_vf(k, j, l, i) + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + Omega(2, i)*vSrc_rsy_vf(k, j, l, i) - end do + end do - flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & - sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsy_vf(k, j, l, 2) - end if + flux_src_vf(E_idx)%sf(j, k, l) = flux_src_vf(E_idx)%sf(j, k, l) + & + sigma*c_divs(num_dims + 1)%sf(j, k, l)*vSrc_rsy_vf(k, j, l, 2) + end if + end do end do end do - end do - $:END_GPU_PARALLEL_LOOP() + $:END_GPU_PARALLEL_LOOP() #:endif elseif (id == 3) then diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index c34792f554..3d1d0b4a2f 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -464,16 +464,16 @@ contains @:ALLOCATE(bc_type(1,1)%sf(0:0,0:n,0:p)) @:ALLOCATE(bc_type(1,2)%sf(0:0,0:n,0:p)) #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 - if (n > 0) then - @:ALLOCATE(bc_type(2,1)%sf(-buff_size:m+buff_size,0:0,0:p)) - @:ALLOCATE(bc_type(2,2)%sf(-buff_size:m+buff_size,0:0,0:p)) - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 - if (p > 0) then - @:ALLOCATE(bc_type(3,1)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,0:0)) - @:ALLOCATE(bc_type(3,2)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,0:0)) + if (n > 0) then + @:ALLOCATE(bc_type(2,1)%sf(-buff_size:m+buff_size,0:0,0:p)) + @:ALLOCATE(bc_type(2,2)%sf(-buff_size:m+buff_size,0:0,0:p)) + #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + if (p > 0) then + @:ALLOCATE(bc_type(3,1)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,0:0)) + @:ALLOCATE(bc_type(3,2)%sf(-buff_size:m+buff_size,-buff_size:n+buff_size,0:0)) + end if + #:endif end if - #:endif - end if #:endif do i = 1, num_dims @@ -532,8 +532,8 @@ contains if (run_time_info) then if (igr .or. dummy) then call s_write_run_time_information(q_cons_ts(1)%vf, t_step) - end if - if(.not. igr .or. dummy) then + end if + if (.not. igr .or. dummy) then call s_write_run_time_information(q_prim_vf, t_step) end if end if @@ -722,15 +722,15 @@ contains impure subroutine s_compute_dt() real(wp) :: rho !< Cell-avg. density -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: vel !< Cell-avg. velocity - real(wp), dimension(3) :: alpha !< Cell-avg. volume fraction -#:else - real(wp), dimension(num_vels) :: vel !< Cell-avg. velocity - real(wp), dimension(num_fluids) :: alpha !< Cell-avg. volume fraction -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: vel !< Cell-avg. velocity + real(wp), dimension(3) :: alpha !< Cell-avg. volume fraction + #:else + real(wp), dimension(num_vels) :: vel !< Cell-avg. velocity + real(wp), dimension(num_fluids) :: alpha !< Cell-avg. volume fraction + #:endif real(wp) :: vel_sum !< Cell-avg. velocity sum - real(wp) :: pres !< Cell-avg. pressure + real(wp) :: pres !< Cell-avg. pressure real(wp) :: gamma !< Cell-avg. sp. heat ratio real(wp) :: pi_inf !< Cell-avg. liquid stiffness function real(wp) :: qv !< Cell-avg. fluid reference energy diff --git a/src/simulation/m_viscous.fpp b/src/simulation/m_viscous.fpp index 3cd6099539..e81a243373 100644 --- a/src/simulation/m_viscous.fpp +++ b/src/simulation/m_viscous.fpp @@ -72,13 +72,13 @@ contains real(wp) :: rho_visc, gamma_visc, pi_inf_visc, alpha_visc_sum !< Mixture variables real(wp), dimension(2) :: Re_visc -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: alpha_visc, alpha_rho_visc - real(wp), dimension(3, 3) :: tau_Re -#:else - real(wp), dimension(num_fluids) :: alpha_visc, alpha_rho_visc - real(wp), dimension(num_dims, num_dims) :: tau_Re -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: alpha_visc, alpha_rho_visc + real(wp), dimension(3, 3) :: tau_Re + #:else + real(wp), dimension(num_fluids) :: alpha_visc, alpha_rho_visc + real(wp), dimension(num_dims, num_dims) :: tau_Re + #:endif integer :: i, j, k, l, q !< Generic loop iterator diff --git a/src/simulation/m_weno.fpp b/src/simulation/m_weno.fpp index ed1f5576ae..32bd2a2d6e 100644 --- a/src/simulation/m_weno.fpp +++ b/src/simulation/m_weno.fpp @@ -647,21 +647,21 @@ contains integer, intent(in) :: weno_dir type(int_bounds_info), intent(in) :: is1_weno_d, is2_weno_d, is3_weno_d -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(-3:2) :: dvd - real(wp), dimension(0:4) :: poly - real(wp), dimension(0:4) :: alpha - real(wp), dimension(0:4) :: omega - real(wp), dimension(0:4) :: beta - real(wp), dimension(0:4) :: delta -#:else - real(wp), dimension(-weno_polyn:weno_polyn - 1) :: dvd - real(wp), dimension(0:weno_num_stencils) :: poly - real(wp), dimension(0:weno_num_stencils) :: alpha - real(wp), dimension(0:weno_num_stencils) :: omega - real(wp), dimension(0:weno_num_stencils) :: beta - real(wp), dimension(0:weno_num_stencils) :: delta -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(-3:2) :: dvd + real(wp), dimension(0:4) :: poly + real(wp), dimension(0:4) :: alpha + real(wp), dimension(0:4) :: omega + real(wp), dimension(0:4) :: beta + real(wp), dimension(0:4) :: delta + #:else + real(wp), dimension(-weno_polyn:weno_polyn - 1) :: dvd + real(wp), dimension(0:weno_num_stencils) :: poly + real(wp), dimension(0:weno_num_stencils) :: alpha + real(wp), dimension(0:weno_num_stencils) :: omega + real(wp), dimension(0:weno_num_stencils) :: beta + real(wp), dimension(0:weno_num_stencils) :: delta + #:endif real(wp), dimension(-3:3) :: v ! temporary field value array for clarity (WENO7 only) real(wp) :: tau @@ -730,8 +730,8 @@ contains do i = 1, v_size ! reconstruct from left side - alpha(:) = 0._wp - omega(:) = 0._wp + alpha(:) = 0._wp + omega(:) = 0._wp beta(:) = weno_eps dvd(0) = v_rs_ws_${XYZ}$ (j + 1, k, l, i) & @@ -756,7 +756,7 @@ contains alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) omega = alpha/sum(alpha) alpha(0:weno_num_stencils) = (d_cbL_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + d_cbL_${XYZ}$ (0:weno_num_stencils, j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & - *(omega(0:weno_num_stencils)/(d_cbL_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbL_${XYZ}$ (0:weno_num_stencils, j)))) + *(omega(0:weno_num_stencils)/(d_cbL_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbL_${XYZ}$ (0:weno_num_stencils, j)))) elseif (wenoz) then ! Borges, et al. (2008) @@ -784,7 +784,7 @@ contains alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) omega = alpha/sum(alpha) alpha(0:weno_num_stencils) = (d_cbR_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + d_cbR_${XYZ}$ (0:weno_num_stencils, j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & - *(omega(0:weno_num_stencils)/(d_cbR_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbR_${XYZ}$ (0:weno_num_stencils, j)))) + *(omega(0:weno_num_stencils)/(d_cbR_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbR_${XYZ}$ (0:weno_num_stencils, j)))) elseif (wenoz) then @@ -816,8 +816,8 @@ contains do i = 1, v_size ! reconstruct from left side - alpha(:) = 0._wp - omega(:) = 0._wp + alpha(:) = 0._wp + omega(:) = 0._wp delta(:) = 0._wp beta(:) = weno_eps @@ -860,7 +860,7 @@ contains alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) omega = alpha/sum(alpha) alpha(0:weno_num_stencils) = (d_cbL_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + d_cbL_${XYZ}$ (0:weno_num_stencils, j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & - *(omega(0:weno_num_stencils)/(d_cbL_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbL_${XYZ}$ (0:weno_num_stencils, j)))) + *(omega(0:weno_num_stencils)/(d_cbL_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbL_${XYZ}$ (0:weno_num_stencils, j)))) elseif (wenoz) then @@ -870,10 +870,10 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils alpha(q) = d_cbL_${XYZ}$ (q, j)*(1._wp + (tau/beta(q))) ! Equation 28 (note: weno_eps was already added to beta) - end do + end do elseif (teno) then - ! Fu, et al. (2016) + ! Fu, et al. (2016) ! Fu''s code: https://dx.doi.org/10.13140/RG.2.2.36250.34247 tau = abs(beta(2) - beta(0)) $:GPU_LOOP(parallelism='[seq]') @@ -882,14 +882,14 @@ contains alpha(q) = (alpha(q)**3._wp)**2._wp ! Equation 22 cont. (some CPU compilers cannot optimize x**6.0) end do omega = alpha/sum(alpha) ! Equation 25 (reuse omega as xi) - + $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils - if(omega(q) < teno_CT) then ! Equation 26 - delta(q) = 0._wp - else - delta(q) = 1._wp - end if + if (omega(q) < teno_CT) then ! Equation 26 + delta(q) = 0._wp + else + delta(q) = 1._wp + end if alpha(q) = delta(q)*d_cbL_${XYZ}$ (q, j) ! Equation 27 end do end if @@ -910,7 +910,6 @@ contains + poly_coef_cbR_${XYZ}$ (j, 2, 0)*dvd(-1) & + poly_coef_cbR_${XYZ}$ (j, 2, 1)*dvd(-2) - if (wenojs) then alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) @@ -918,19 +917,19 @@ contains alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) omega = alpha/sum(alpha) alpha(0:weno_num_stencils) = (d_cbR_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + d_cbR_${XYZ}$ (0:weno_num_stencils, j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & - *(omega(0:weno_num_stencils)/(d_cbR_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbR_${XYZ}$ (0:weno_num_stencils, j)))) + *(omega(0:weno_num_stencils)/(d_cbR_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbR_${XYZ}$ (0:weno_num_stencils, j)))) elseif (wenoz) then $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils - alpha(q) = d_cbR_${XYZ}$ (q, j)*(1._wp + (tau/beta(q))) - end do + alpha(q) = d_cbR_${XYZ}$ (q, j)*(1._wp + (tau/beta(q))) + end do elseif (teno) then $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils - alpha(q) = delta(q)*d_cbR_${XYZ}$ (q, j) + alpha(q) = delta(q)*d_cbR_${XYZ}$ (q, j) end do end if @@ -963,8 +962,8 @@ contains $:GPU_LOOP(parallelism='[seq]') do i = 1, v_size - alpha(:) = 0._wp - omega(:) = 0._wp + alpha(:) = 0._wp + omega(:) = 0._wp delta(:) = 0._wp beta(:) = weno_eps @@ -1078,7 +1077,7 @@ contains alpha(0:weno_num_stencils) = d_cbL_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) omega = alpha/sum(alpha) alpha(0:weno_num_stencils) = (d_cbL_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + d_cbL_${XYZ}$ (0:weno_num_stencils, j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & - *(omega(0:weno_num_stencils)/(d_cbL_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbL_${XYZ}$ (0:weno_num_stencils, j)))) + *(omega(0:weno_num_stencils)/(d_cbL_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbL_${XYZ}$ (0:weno_num_stencils, j)))) elseif (wenoz) then ! Castro, et al. (2010) @@ -1087,7 +1086,7 @@ contains $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils alpha(q) = d_cbL_${XYZ}$ (q, j)*(1._wp + (tau/beta(q))**wenoz_q) ! wenoz_q = 2,3,4 for stability - end do + end do elseif (teno) then #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 @@ -1095,14 +1094,14 @@ contains alpha = 1._wp + tau/beta alpha = (alpha**3._wp)**2._wp ! some CPU compilers cannot optimize x**6.0 omega = alpha/sum(alpha) - + $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils - if(omega(q) < teno_CT) then ! Equation 26 - delta(q) = 0._wp - else - delta(q) = 1._wp - end if + if (omega(q) < teno_CT) then ! Equation 26 + delta(q) = 0._wp + else + delta(q) = 1._wp + end if alpha(q) = delta(q)*d_cbL_${XYZ}$ (q, j) ! Equation 27 end do #:endif @@ -1112,7 +1111,7 @@ contains vL_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2) + omega(3)*poly(3) - if(teno) then + if (teno) then #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 vL_rs_vf_${XYZ}$ (j, k, l, i) = vL_rs_vf_${XYZ}$ (j, k, l, i) + omega(4)*poly(4) #:endif @@ -1152,19 +1151,19 @@ contains alpha(0:weno_num_stencils) = d_cbR_${XYZ}$ (0:weno_num_stencils, j)/(beta(0:weno_num_stencils)**2._wp) omega = alpha/sum(alpha) alpha(0:weno_num_stencils) = (d_cbR_${XYZ}$ (0:weno_num_stencils, j)*(1._wp + d_cbR_${XYZ}$ (0:weno_num_stencils, j) - 3._wp*omega(0:weno_num_stencils)) + omega(0:weno_num_stencils)**2._wp) & - *(omega(0:weno_num_stencils)/(d_cbR_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbR_${XYZ}$ (0:weno_num_stencils, j)))) + *(omega(0:weno_num_stencils)/(d_cbR_${XYZ}$ (0:weno_num_stencils, j)**2._wp + omega(0:weno_num_stencils)*(1._wp - 2._wp*d_cbR_${XYZ}$ (0:weno_num_stencils, j)))) elseif (wenoz) then - $:GPU_LOOP(parallelism='[seq]') + $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils alpha(q) = d_cbR_${XYZ}$ (q, j)*(1._wp + (tau/beta(q))**wenoz_q) ! wenoz_q = 2,3,4 for stability - end do + end do elseif (teno) then $:GPU_LOOP(parallelism='[seq]') do q = 0, weno_num_stencils - alpha(q) = delta(q)*d_cbR_${XYZ}$ (q, j) + alpha(q) = delta(q)*d_cbR_${XYZ}$ (q, j) end do end if @@ -1172,7 +1171,7 @@ contains vR_rs_vf_${XYZ}$ (j, k, l, i) = omega(0)*poly(0) + omega(1)*poly(1) + omega(2)*poly(2) + omega(3)*poly(3) - if(teno) then + if (teno) then #:if not MFC_CASE_OPTIMIZATION or weno_num_stencils > 3 vR_rs_vf_${XYZ}$ (j, k, l, i) = vR_rs_vf_${XYZ}$ (j, k, l, i) + omega(4)*poly(4) #:endif From 7d13d5d5a99974fe1192159a2c2a7a31893eaf24 Mon Sep 17 00:00:00 2001 From: Anand Date: Wed, 28 Jan 2026 23:47:09 -0500 Subject: [PATCH 09/21] Cmake cleanup --- CMakeLists.txt | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 082bff3e65..66d7326c3d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -460,9 +460,13 @@ function(MFC_SETUP_TARGET) if (MFC_MPI AND ARGS_MPI) find_package(MPI COMPONENTS Fortran REQUIRED) - target_compile_definitions(${a_target} PRIVATE MFC_MPI) - target_compile_options(${a_target} PRIVATE "$ENV{CRAY_MPICH_INC}") - target_link_libraries (${a_target} PRIVATE $ENV{CRAY_MPICH_LIB}) + target_compile_definitions(${a_target} PRIVATE MFC_MPI) + if(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + target_compile_options(${a_target} PRIVATE "$ENV{CRAY_MPICH_INC}") + target_link_libraries(${a_target} PRIVATE $ENV{CRAY_MPICH_LIB}) + else() + target_link_libraries(${a_target} PRIVATE MPI::MPI_Fortran) + endif() endif() if (ARGS_SILO) @@ -528,8 +532,8 @@ function(MFC_SETUP_TARGET) target_compile_options(${a_target} PRIVATE -fopenmp) target_link_options(${a_target} PRIVATE -fopenmp) elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") - target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a) - target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a) + target_compile_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a) + target_link_options(${a_target} PRIVATE -fopenmp --offload-arch=gfx90a) endif() endif() From eeb5e25c240ba1f7bf26c7b3ffe984cee518d042 Mon Sep 17 00:00:00 2001 From: Anand Date: Thu, 29 Jan 2026 01:45:57 -0500 Subject: [PATCH 10/21] toolchain and CI workarounds --- .github/workflows/bench.yml | 8 ++ .github/workflows/frontier_amd/bench.sh | 22 ++++++ .github/workflows/frontier_amd/build.sh | 26 +++++++ .../workflows/frontier_amd/submit-bench.sh | 54 ++++++++++++++ .github/workflows/frontier_amd/submit.sh | 56 ++++++++++++++ .github/workflows/frontier_amd/test.sh | 20 +++++ .github/workflows/test.yml | 19 +++++ toolchain/bootstrap/modules.sh | 26 ++++++- toolchain/modules | 5 ++ toolchain/templates/frontier_amd.mako | 73 +++++++++++++++++++ 10 files changed, 306 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/frontier_amd/bench.sh create mode 100644 .github/workflows/frontier_amd/build.sh create mode 100644 .github/workflows/frontier_amd/submit-bench.sh create mode 100644 .github/workflows/frontier_amd/submit.sh create mode 100644 .github/workflows/frontier_amd/test.sh create mode 100644 toolchain/templates/frontier_amd.mako diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 2ccdfca87a..0653883140 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -70,6 +70,14 @@ jobs: device: gpu interface: omp build_script: "bash .github/workflows/frontier/build.sh gpu omp bench" + - cluster: frontier + name: Oak Ridge | Frontier (AMD) + group: phoenix + labels: frontier_amd + flag: famd + device: gpu + interface: omp + build_script: "bash .github/workflows/frontier_amd/build.sh gpu omp bench" runs-on: group: ${{ matrix.group }} labels: ${{ matrix.labels }} diff --git a/.github/workflows/frontier_amd/bench.sh b/.github/workflows/frontier_amd/bench.sh new file mode 100644 index 0000000000..6e01687e79 --- /dev/null +++ b/.github/workflows/frontier_amd/bench.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +n_ranks=12 +device_opts="" +if [ "$job_device" = "gpu" ]; then + gpus=$(rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n' ' ') + n_ranks=$(echo "$gpus" | wc -w) # number of GPUs on node + gpu_ids=$(echo "$gpus" | tr ' ' '\n' | tr '\n' ' ' | sed 's/ $//') # GPU IDs from rocm-smi + device_opts+="--gpu" + if [ "$job_interface" = "acc" ]; then + device_opts+=" acc" + elif [ "$job_interface" = "omp" ]; then + device_opts+=" mp" + fi + device_opts+=" -g $gpu_ids" +fi + +if [ "$job_device" = "gpu" ]; then + ./mfc.sh bench --mem 12 -j $n_ranks -o "$job_slug.yaml" -- -c frontier_amd $device_opts -n $n_ranks +else + ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c frontier_amd $device_opts -n $n_ranks +fi diff --git a/.github/workflows/frontier_amd/build.sh b/.github/workflows/frontier_amd/build.sh new file mode 100644 index 0000000000..b8b1f7051c --- /dev/null +++ b/.github/workflows/frontier_amd/build.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +job_device=$1 +job_interface=$2 +run_bench=$3 +build_opts="" +if [ "$job_device" = "gpu" ]; then + build_opts+="--gpu" + if [ "$job_interface" = "acc" ]; then + build_opts+=" acc" + elif [ "$job_interface" = "omp" ]; then + build_opts+=" mp" + fi +fi + +. ./mfc.sh load -c famd -m g + +if [ "$run_bench" == "bench" ]; then + for dir in benchmarks/*/; do + dirname=$(basename "$dir") + ./mfc.sh run "$dir/case.py" --case-optimization -j 8 --dry-run $build_opts + done +else + ./mfc.sh test -a --dry-run -j 8 $build_opts +fi + diff --git a/.github/workflows/frontier_amd/submit-bench.sh b/.github/workflows/frontier_amd/submit-bench.sh new file mode 100644 index 0000000000..5445c75aac --- /dev/null +++ b/.github/workflows/frontier_amd/submit-bench.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +set -e + +usage() { + echo "Usage: $0 [script.sh] [cpu|gpu]" +} + +if [ ! -z "$1" ]; then + sbatch_script_contents=`cat $1` +else + usage + exit 1 +fi + +if [ "$2" = "cpu" ]; then + sbatch_device_opts="\ +#SBATCH -n 32 # Number of cores required" +elif [ "$2" = "gpu" ]; then + sbatch_device_opts="\ +#SBATCH -n 8 # Number of cores required" +else + usage; exit 1 +fi + + +job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2-$3" + +sbatch < + +% if engine == 'batch': +#SBATCH --nodes=${nodes} +#SBATCH --ntasks-per-node=${tasks_per_node} +#SBATCH --job-name="${name}" +#SBATCH --output="${name}.out" +#SBATCH --time=${walltime} +#SBATCH --cpus-per-task=7 +#SBATCH -C nvme +% if gpu_enabled: +#SBATCH --gpus-per-task=1 +#SBATCH --gpu-bind=closest +% endif +% if account: +#SBATCH --account=${account} +% endif +% if partition: +#SBATCH --partition=${partition} +% endif +% if quality_of_service: +#SBATCH --qos=${quality_of_service} +% endif +% if email: +#SBATCH --mail-user=${email} +#SBATCH --mail-type="BEGIN, END, FAIL" +% endif +% endif + +${helpers.template_prologue()} + +ok ":) Loading modules:\n" +cd "${MFC_ROOT_DIR}" +% if engine == 'batch': +. ./mfc.sh load -c famd -m ${'g' if gpu_enabled else 'c'} +% endif +cd - > /dev/null +echo + +ulimit -s unlimited + +% for target in targets: + ${helpers.run_prologue(target)} + + % if engine == 'batch': + # Broadcast binary to compute nodes + sbcast --send-libs -pf ${target.get_install_binpath(case)} /mnt/bb/$USER/${target.name} + % endif + + % if not mpi: + (set -x; ${profiler} "${target.get_install_binpath(case)}") + % else: + (set -x; srun --unbuffered \ + % if engine == 'interactive': + --unbuffered --nodes ${nodes} --ntasks-per-node ${tasks_per_node} \ + --cpus-per-task 7 \ + % if gpu_enabled: + --gpus-per-task 1 --gpu-bind closest \ + % endif + ${profiler} "${target.get_install_binpath(case)}") + % else: + ${profiler} "/mnt/bb/$USER/${target.name}") + % endif + % endif + + ${helpers.run_epilogue(target)} + + echo +% endfor + +${helpers.template_epilogue()} From 3c3d7f1b0836ede80a9dadcecfb41d59a180555e Mon Sep 17 00:00:00 2001 From: Anand Date: Thu, 29 Jan 2026 01:57:48 -0500 Subject: [PATCH 11/21] cleanup cmake --- CMakeLists.txt | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 66d7326c3d..46f1e17dc8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -185,9 +185,11 @@ elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") add_link_options("SHELL: -K trap=fp" "SHELL: -G2") endif() -elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") +elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Flang") add_compile_options( - #$<$:-Wall> + $<$:-Mfreeform> + $<$:-Mpreprocess> + $<$:-fdefault-real-8> ) elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Intel") add_compile_options($<$:-free>) @@ -381,7 +383,7 @@ macro(HANDLE_SOURCES target useCommon useOpenACC useOpenMP) -D MFC_${${target}_UPPER} -D MFC_COMPILER="${CMAKE_Fortran_COMPILER_ID}" -D MFC_GPU_MODE="${MFC_GPU_MODE}" - -D MFC_CASE_OPTIMIZATION=False + -D MFC_CASE_OPTIMIZATION=False -D chemistry=False --line-numbering --no-folding @@ -439,9 +441,9 @@ function(MFC_SETUP_TARGET) foreach (a_target ${IPO_TARGETS}) set_target_properties(${a_target} PROPERTIES Fortran_PREPROCESS ON) - message(STATUS ${CMAKE_Fortran_COMPILER_ID}) + message(STATUS ${CMAKE_Fortran_COMPILER_ID}) - target_include_directories(${a_target} PRIVATE + target_include_directories(${a_target} PRIVATE "${CMAKE_SOURCE_DIR}/src/common" "${CMAKE_SOURCE_DIR}/src/common/include" "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}") @@ -484,12 +486,14 @@ function(MFC_SETUP_TARGET) if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") find_package(CUDAToolkit REQUIRED) target_link_libraries(${a_target} PRIVATE CUDA::cudart CUDA::cufft) - else() - #find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED) - target_link_libraries(${a_target} PRIVATE $ENV{CRAY_HIPFORT_LIB}) - endif() + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang") + target_link_libraries(${a_target} PRIVATE $ENV{CRAY_HIPFORT_LIB}) + else() + find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED) + target_link_libraries(${a_target} PRIVATE hipfort::hipfft) + endif() else() - find_package(FFTW REQUIRED) + find_package(FFTW REQUIRED) target_link_libraries(${a_target} PRIVATE FFTW::FFTW) endif() endif() From 8a60c3065eb1f316eaf48c7837d97d6ca01c4061 Mon Sep 17 00:00:00 2001 From: Anand Date: Thu, 29 Jan 2026 02:04:04 -0500 Subject: [PATCH 12/21] format --- CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 46f1e17dc8..c739106134 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -441,9 +441,9 @@ function(MFC_SETUP_TARGET) foreach (a_target ${IPO_TARGETS}) set_target_properties(${a_target} PROPERTIES Fortran_PREPROCESS ON) - message(STATUS ${CMAKE_Fortran_COMPILER_ID}) + message(STATUS ${CMAKE_Fortran_COMPILER_ID}) - target_include_directories(${a_target} PRIVATE + target_include_directories(${a_target} PRIVATE "${CMAKE_SOURCE_DIR}/src/common" "${CMAKE_SOURCE_DIR}/src/common/include" "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}") @@ -603,8 +603,8 @@ function(MFC_SETUP_TARGET) PRIVATE -DFRONTIER_UNIFIED) endif() - find_package(hipfort COMPONENTS hip CONFIG REQUIRED) - target_link_libraries(${a_target} PRIVATE hipfort::hip hipfort::hipfort-amdgcn flang_rt.hostdevice) + find_package(hipfort COMPONENTS hip CONFIG REQUIRED) + target_link_libraries(${a_target} PRIVATE hipfort::hip hipfort::hipfort-amdgcn flang_rt.hostdevice) endif() elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") target_compile_options(${a_target} PRIVATE "SHELL:-h noacc" "SHELL:-x acc") From d4f1a40ac6506c09a89505364408aa18143f0818 Mon Sep 17 00:00:00 2001 From: Anand Date: Thu, 29 Jan 2026 02:22:30 -0500 Subject: [PATCH 13/21] Merge with latest commit and fix typo --- src/simulation/m_ibm.fpp | 7 +++++-- toolchain/bootstrap/modules.sh | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 8c78db6770..68201aa315 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -1039,8 +1039,11 @@ contains real(wp), dimension(1:3, 1:3) :: viscous_stress_div, viscous_stress_div_1, viscous_stress_div_2, viscous_cross_1, viscous_cross_2 ! viscous stress tensor with temp vectors to hold divergence calculations real(wp), dimension(1:3) :: local_force_contribution, radial_vector, local_torque_contribution, vel real(wp) :: cell_volume, dx, dy, dz, dynamic_viscosity - real(wp), dimension(1:num_fluids) :: dynamic_viscosities - +#:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: dynamic_viscosities +#:else + real(wp), dimension(num_fluids) :: dynamic_viscosities +#:endif forces = 0._wp torques = 0._wp diff --git a/toolchain/bootstrap/modules.sh b/toolchain/bootstrap/modules.sh index 5cfea6867d..2185309ada 100644 --- a/toolchain/bootstrap/modules.sh +++ b/toolchain/bootstrap/modules.sh @@ -123,7 +123,7 @@ if [ ! -z ${CRAY_LD_LIBRARY_PATH+x} ] && [ "$u_c" '!=' 'c' ] && [ "$u_c" '!=' ' export LD_LIBRARY_PATH="$CRAY_LD_LIBRARY_PATH:$LD_LIBRARY_PATH" fi -if[ "$u_c" '==' 'famd' ]; then +if [ "$u_c" '==' 'famd' ]; then export OLCF_AFAR_ROOT="/sw/crusher/ums/compilers/afar/rocm-afar-8873-drop-22.2.0" export PATH=${OLCF_AFAR_ROOT}/lib/llvm/bin:${PATH} From 37a9484045eef5277c8856350f8cc09c5745dbfd Mon Sep 17 00:00:00 2001 From: Anand Date: Thu, 29 Jan 2026 02:26:23 -0500 Subject: [PATCH 14/21] format --- src/simulation/m_ibm.fpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 68201aa315..5e5e136629 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -1039,11 +1039,11 @@ contains real(wp), dimension(1:3, 1:3) :: viscous_stress_div, viscous_stress_div_1, viscous_stress_div_2, viscous_cross_1, viscous_cross_2 ! viscous stress tensor with temp vectors to hold divergence calculations real(wp), dimension(1:3) :: local_force_contribution, radial_vector, local_torque_contribution, vel real(wp) :: cell_volume, dx, dy, dz, dynamic_viscosity -#:if not MFC_CASE_OPTIMIZATION and USING_AMD - real(wp), dimension(3) :: dynamic_viscosities -#:else - real(wp), dimension(num_fluids) :: dynamic_viscosities -#:endif + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + real(wp), dimension(3) :: dynamic_viscosities + #:else + real(wp), dimension(num_fluids) :: dynamic_viscosities + #:endif forces = 0._wp torques = 0._wp From 5457d1182022ffb77d7da6d262e272686b3df60d Mon Sep 17 00:00:00 2001 From: Anand Date: Thu, 29 Jan 2026 04:20:46 -0500 Subject: [PATCH 15/21] Fix regression on EL case (phoenix) --- src/simulation/m_bubbles_EL.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simulation/m_bubbles_EL.fpp b/src/simulation/m_bubbles_EL.fpp index b3917c3488..c391e58cc6 100644 --- a/src/simulation/m_bubbles_EL.fpp +++ b/src/simulation/m_bubbles_EL.fpp @@ -577,7 +577,7 @@ contains ! Radial motion model adap_dt_stop_max = 0 - $:GPU_PARALLEL_LOOP(private='[k,i,myalpha_rho,myalpha,myVapFlux,preterm1, term2, paux, pint, Romega, term1_fac,myR_m, mygamma_m, myPb, myMass_n, myMass_v,myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot,myPinf, aux1, aux2, myCson, myRho,gamma,pi_inf,qv,dmalf, dmntait, dmBtait, dm_bub_adv_src, dm_divu,adap_dt_stop]', & + $:GPU_PARALLEL_LOOP(private='[k,i,myalpha_rho,myalpha,Re,cell,myVapFlux,preterm1, term2, paux, pint, Romega, term1_fac,myR_m, mygamma_m, myPb, myMass_n, myMass_v,myR, myV, myBeta_c, myBeta_t, myR0, myPbdot, myMvdot,myPinf, aux1, aux2, myCson, myRho,gamma,pi_inf,qv,dmalf, dmntait, dmBtait, dm_bub_adv_src, dm_divu,adap_dt_stop]', & & reduction='[[adap_dt_stop_max]]',reductionOp='[MAX]', & & copy='[adap_dt_stop_max]',copyin='[stage]') do k = 1, nBubs From 8ccb1943e5e57a864d697b33be08b11d6a81587e Mon Sep 17 00:00:00 2001 From: Anand Date: Thu, 29 Jan 2026 05:59:49 -0500 Subject: [PATCH 16/21] Revert fypp --- CMakeLists.txt | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c739106134..da1ec9adb9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -310,17 +310,7 @@ endif() # and generate documentation. Instead, we can simply include the list of .f90 # files that will eventually be used to compile . -macro(HANDLE_SOURCES target useCommon useOpenACC useOpenMP) - - if (${useOpenACC} AND ${useOpenMP}) - message(FATAL_ERROR "OpenACC and OpenMP at same time is unsupported.") - elseif (${useOpenACC}) - set(MFC_GPU_MODE "OpenACC") - elseif (${useOpenMP}) - set(MFC_GPU_MODE "OpenMP") - else() - set(MFC_GPU_MODE "") - endif() +macro(HANDLE_SOURCES target useCommon) set(${target}_DIR "${CMAKE_SOURCE_DIR}/src/${target}") set(common_DIR "${CMAKE_SOURCE_DIR}/src/common") @@ -382,7 +372,6 @@ macro(HANDLE_SOURCES target useCommon useOpenACC useOpenMP) -D MFC_${CMAKE_Fortran_COMPILER_ID} -D MFC_${${target}_UPPER} -D MFC_COMPILER="${CMAKE_Fortran_COMPILER_ID}" - -D MFC_GPU_MODE="${MFC_GPU_MODE}" -D MFC_CASE_OPTIMIZATION=False -D chemistry=False --line-numbering @@ -400,10 +389,10 @@ macro(HANDLE_SOURCES target useCommon useOpenACC useOpenMP) endmacro() -HANDLE_SOURCES(pre_process ON OFF OFF) -HANDLE_SOURCES(simulation ON ${MFC_OpenACC} ${MFC_OpenMP}) -HANDLE_SOURCES(post_process ON OFF OFF) -HANDLE_SOURCES(syscheck OFF ${MFC_OpenACC} ${MFC_OpenMP}) +HANDLE_SOURCES(pre_process ON) +HANDLE_SOURCES(simulation ON) +HANDLE_SOURCES(post_process ON) +HANDLE_SOURCES(syscheck OFF) # MFC_SETUP_TARGET: Given a target (herein ), this macro creates a new # executable with the appropriate sources, compiler definitions, and From a70b8cd1ac68fc6166b34db27f21b888466dd550 Mon Sep 17 00:00:00 2001 From: Anand Date: Thu, 29 Jan 2026 06:18:14 -0500 Subject: [PATCH 17/21] add amd to file filter --- .github/file-filter.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/file-filter.yml b/.github/file-filter.yml index 6c8fb7216b..a2910c89af 100644 --- a/.github/file-filter.yml +++ b/.github/file-filter.yml @@ -24,6 +24,7 @@ scripts: &scripts yml: &yml - '.github/workflows/phoenix/**' - '.github/workflows/frontier/**' + - '.github/workflows/frontier_amd/**' - '.github/workflows/bench.yml' - '.github/workflows/test.yml' - '.github/workflows/formatting.yml' From 146c2a5e17277fc280034c78f92a46128db52dfb Mon Sep 17 00:00:00 2001 From: Anand Date: Thu, 29 Jan 2026 09:46:42 -0500 Subject: [PATCH 18/21] Checkers for AMD compiler + fixing a typo --- .github/workflows/bench.yml | 4 ++-- .github/workflows/test.yml | 23 +++++++++++++++-------- src/common/m_boundary_common.fpp | 2 +- src/common/m_checker_common.fpp | 14 ++++++++++++++ src/simulation/m_start_up.fpp | 5 +++++ 5 files changed, 37 insertions(+), 11 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 0653883140..1427f9d693 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -70,10 +70,10 @@ jobs: device: gpu interface: omp build_script: "bash .github/workflows/frontier/build.sh gpu omp bench" - - cluster: frontier + - cluster: frontier_amd name: Oak Ridge | Frontier (AMD) group: phoenix - labels: frontier_amd + labels: frontier flag: famd device: gpu interface: omp diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b7a6a6b9e9..bf0cd8e247 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -103,31 +103,38 @@ jobs: include: # Phoenix (GT) - lbl: 'gt' + lbl_comp: 'gt' cluster_name: 'Georgia Tech | Phoenix' device: 'gpu' interface: 'acc' - lbl: 'gt' + lbl_comp: 'gt' cluster_name: 'Georgia Tech | Phoenix' device: 'gpu' interface: 'omp' - lbl: 'gt' + lbl_comp: 'gt' cluster_name: 'Georgia Tech | Phoenix' device: 'cpu' interface: 'none' # Frontier (ORNL) - lbl: 'frontier' + lbl_comp: 'frontier' cluster_name: 'Oak Ridge | Frontier' device: 'gpu' interface: 'acc' - lbl: 'frontier' + lbl_comp: 'frontier' cluster_name: 'Oak Ridge | Frontier' device: 'gpu' interface: 'omp' - lbl: 'frontier' + lbl_comp: 'frontier' cluster_name: 'Oak Ridge | Frontier' device: 'cpu' interface: 'none' - - lbl: 'frontier_amd' + - lbl: 'frontier' + lbl_comp: 'frontier_amd' cluster_name: 'Oak Ridge | Frontier (AMD)' device: 'gpu' interface: 'omp' @@ -143,23 +150,23 @@ jobs: uses: actions/checkout@v4 - name: Build & Test - if: matrix.lbl == 'gt' + if: matrix.lbl_comp == 'gt' run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/test.sh ${{ matrix.device }} ${{ matrix.interface }} - name: Build - if: matrix.lbl == 'frontier' + if: matrix.lbl_comp == 'frontier' run: bash .github/workflows/frontier/build.sh ${{ matrix.device }} ${{ matrix.interface }} - name: Test - if: matrix.lbl == 'frontier' + if: matrix.lbl_comp == 'frontier' run: bash .github/workflows/frontier/submit.sh .github/workflows/frontier/test.sh ${{matrix.device}} ${{ matrix.interface }} - name: Build - if: matrix.lbl == 'frontier_amd' + if: matrix.lbl_comp == 'frontier_amd' run: bash .github/workflows/frontier_amd/build.sh ${{ matrix.device }} ${{ matrix.interface }} - name: Test - if: matrix.lbl == 'frontier_amd' + if: matrix.lbl_comp == 'frontier_amd' run: bash .github/workflows/frontier_amd/submit.sh .github/workflows/frontier_amd/test.sh ${{matrix.device}} ${{ matrix.interface }} - name: Print Logs @@ -168,14 +175,14 @@ jobs: - name: Archive Logs uses: actions/upload-artifact@v4 - if: matrix.lbl == 'frontier' + if: matrix.lbl_comp == 'frontier' with: name: logs-${{ strategy.job-index }}-${{ matrix.device }}-${{ matrix.interface }} path: test-${{ matrix.device }}-${{ matrix.interface }}.out - name: Archive Logs uses: actions/upload-artifact@v4 - if: matrix.lbl == 'frontier_amd' + if: matrix.lbl_comp == 'frontier_amd' with: name: logs-${{ strategy.job-index }}-${{ matrix.device }}-${{ matrix.interface }} path: test-${{ matrix.device }}-${{ matrix.interface }}.out diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index a0c4ed2455..4b5f660bd6 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -1558,7 +1558,7 @@ contains end if - #:if not MFC_CASE_OPTIMIZATION or num_dims > 2 + #:if not MFC_CASE_OPTIMIZATION or num_dims > 1 if (n == 0) then return diff --git a/src/common/m_checker_common.fpp b/src/common/m_checker_common.fpp index 4d5b7d80cf..495eada91f 100644 --- a/src/common/m_checker_common.fpp +++ b/src/common/m_checker_common.fpp @@ -2,6 +2,7 @@ !!@file m_checker_common.f90 !!@brief Contains module m_checker_common +#:include 'case.fpp' #:include 'macros.fpp' !> @brief The purpose of the module is to check for compatible input files for. @@ -29,6 +30,9 @@ contains #ifndef MFC_SIMULATION call s_check_total_cells #endif + #:if USING_AMD + call s_check_amd + #:endif end subroutine s_check_inputs_common @@ -46,6 +50,16 @@ contains "which is currently "//trim(numStr)) end subroutine s_check_total_cells + impure subroutine s_check_amd + + #:if not MFC_CASE_OPTIMIZATION + @:PROHIBIT(num_fluids > 3, "num_fluids <= 3 for AMDFLang when Case optimization is off") + @:PROHIBIT((bubbles_euler .or. bubbles_lagrange) .and. nb > 3, "nb <= 3 for AMDFLang when Case optimization is off") + @:PROHIBIT(chemistry .and. num_species /= 10, "num_species = 10 for AMDFLang when Case optimization is off") + #:endif + + end subroutine s_check_amd + #endif #ifndef MFC_POST_PROCESS diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index d46a3e0793..5e9da9c292 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1301,6 +1301,11 @@ contains real(wp) :: temp1, temp2, temp3, temp4 call s_initialize_global_parameters_module() + #:if not MFC_CASE_OPTIMIZATION and USING_AMD + #:for BC in {-5, -6, -7, -8, -9, -10, -11, -12, -13} + @:PROHIBIT(any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == ${BC}$) .and. sys_size > 12, "CBC module with AMD compiler requires sys_size <= 12 when case optimization is turned off") + #:endfor + #:endif if (bubbles_euler .or. bubbles_lagrange) then call s_initialize_bubbles_model() end if From bce4bf6c59d6ed7fafee6cffe21b119b87e884f6 Mon Sep 17 00:00:00 2001 From: Anand Date: Thu, 29 Jan 2026 09:47:30 -0500 Subject: [PATCH 19/21] format --- src/common/m_checker_common.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/m_checker_common.fpp b/src/common/m_checker_common.fpp index 495eada91f..d98b7592d0 100644 --- a/src/common/m_checker_common.fpp +++ b/src/common/m_checker_common.fpp @@ -52,7 +52,7 @@ contains impure subroutine s_check_amd - #:if not MFC_CASE_OPTIMIZATION + #:if not MFC_CASE_OPTIMIZATION @:PROHIBIT(num_fluids > 3, "num_fluids <= 3 for AMDFLang when Case optimization is off") @:PROHIBIT((bubbles_euler .or. bubbles_lagrange) .and. nb > 3, "nb <= 3 for AMDFLang when Case optimization is off") @:PROHIBIT(chemistry .and. num_species /= 10, "num_species = 10 for AMDFLang when Case optimization is off") From c4af75bc11e225f0feec7f2ead935b74584705f2 Mon Sep 17 00:00:00 2001 From: Anand Date: Thu, 29 Jan 2026 10:05:50 -0500 Subject: [PATCH 20/21] fixing typos --- src/common/m_checker_common.fpp | 4 ++-- src/simulation/m_start_up.fpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/m_checker_common.fpp b/src/common/m_checker_common.fpp index d98b7592d0..ff4af7bd20 100644 --- a/src/common/m_checker_common.fpp +++ b/src/common/m_checker_common.fpp @@ -50,6 +50,8 @@ contains "which is currently "//trim(numStr)) end subroutine s_check_total_cells +#endif + impure subroutine s_check_amd #:if not MFC_CASE_OPTIMIZATION @@ -60,8 +62,6 @@ contains end subroutine s_check_amd -#endif - #ifndef MFC_POST_PROCESS #endif diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 5e9da9c292..3e690ec3da 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1301,7 +1301,7 @@ contains real(wp) :: temp1, temp2, temp3, temp4 call s_initialize_global_parameters_module() - #:if not MFC_CASE_OPTIMIZATION and USING_AMD + #:if USING_AMD #:for BC in {-5, -6, -7, -8, -9, -10, -11, -12, -13} @:PROHIBIT(any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == ${BC}$) .and. sys_size > 12, "CBC module with AMD compiler requires sys_size <= 12 when case optimization is turned off") #:endfor From bd28799067f37d20c941075d5a8d1dcb2346da5c Mon Sep 17 00:00:00 2001 From: Anand Date: Thu, 29 Jan 2026 12:21:31 -0500 Subject: [PATCH 21/21] fix bounds in CBC and add cpu test suite for amdflang --- .github/workflows/test.yml | 5 +++++ src/post_process/m_global_parameters.fpp | 2 +- src/pre_process/m_global_parameters.fpp | 2 +- src/simulation/m_cbc.fpp | 2 +- src/simulation/m_compute_cbc.fpp | 24 ++++++++++++------------ src/simulation/m_global_parameters.fpp | 2 +- src/simulation/m_start_up.fpp | 3 ++- 7 files changed, 23 insertions(+), 17 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bf0cd8e247..f0dc72783d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -138,6 +138,11 @@ jobs: cluster_name: 'Oak Ridge | Frontier (AMD)' device: 'gpu' interface: 'omp' + - lbl: 'frontier' + lbl_comp: 'frontier_amd' + cluster_name: 'Oak Ridge | Frontier (AMD)' + device: 'cpu' + interface: 'none' runs-on: group: phoenix labels: ${{ matrix.lbl }} diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index e4585d011a..3d0ddcef6d 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -245,7 +245,7 @@ module m_global_parameters logical :: E_wrt logical, dimension(num_fluids_max) :: alpha_rho_e_wrt logical :: fft_wrt - logical :: dummy + logical :: dummy !< AMDFlang workaround: keep a dummy logical to avoid a compiler case-optimization bug when a parameter+GPU-kernel conditional is false logical :: pres_wrt logical, dimension(num_fluids_max) :: alpha_wrt logical :: gamma_wrt diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index c867de7562..933bfb6f5b 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -300,7 +300,7 @@ module m_global_parameters !! to the next time-step. logical :: fft_wrt - logical :: dummy + logical :: dummy !< AMDFlang workaround: keep a dummy logical to avoid a compiler case-optimization bug when a parameter+GPU-kernel conditional is false contains diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 4dc7d936fb..012d2358f2 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -651,7 +651,7 @@ contains real(wp) :: dqv_dt real(wp) :: dpres_ds #:if USING_AMD - real(wp), dimension(12) :: L + real(wp), dimension(20) :: L #:else real(wp), dimension(sys_size) :: L #:endif diff --git a/src/simulation/m_compute_cbc.fpp b/src/simulation/m_compute_cbc.fpp index 8c6a1d9fca..13bd6a8209 100644 --- a/src/simulation/m_compute_cbc.fpp +++ b/src/simulation/m_compute_cbc.fpp @@ -36,7 +36,7 @@ contains subroutine s_fill_density_L(L, lambda_factor, lambda2, c, mf, dalpha_rho_ds, dpres_ds) $:GPU_ROUTINE(parallelism='[seq]') #:if USING_AMD - real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(20), intent(inout) :: L #:else real(wp), dimension(sys_size), intent(inout) :: L #:endif @@ -59,7 +59,7 @@ contains subroutine s_fill_velocity_L(L, lambda_factor, lambda2, dvel_ds) $:GPU_ROUTINE(parallelism='[seq]') #:if USING_AMD - real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(20), intent(inout) :: L #:else real(wp), dimension(sys_size), intent(inout) :: L #:endif @@ -81,7 +81,7 @@ contains subroutine s_fill_advection_L(L, lambda_factor, lambda2, dadv_ds) $:GPU_ROUTINE(parallelism='[seq]') #:if USING_AMD - real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(20), intent(inout) :: L #:else real(wp), dimension(sys_size), intent(inout) :: L #:endif @@ -103,7 +103,7 @@ contains subroutine s_fill_chemistry_L(L, lambda_factor, lambda2, dYs_ds) $:GPU_ROUTINE(parallelism='[seq]') #:if USING_AMD - real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(20), intent(inout) :: L #:else real(wp), dimension(sys_size), intent(inout) :: L #:endif @@ -130,7 +130,7 @@ contains real(wp), dimension(3), intent(in) :: lambda #:if USING_AMD - real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(20), intent(inout) :: L #:else real(wp), dimension(sys_size), intent(inout) :: L #:endif @@ -154,7 +154,7 @@ contains real(wp), dimension(3), intent(in) :: lambda #:if USING_AMD - real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(20), intent(inout) :: L #:else real(wp), dimension(sys_size), intent(inout) :: L #:endif @@ -194,7 +194,7 @@ contains real(wp), dimension(3), intent(in) :: lambda #:if USING_AMD - real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(20), intent(inout) :: L #:else real(wp), dimension(sys_size), intent(inout) :: L #:endif @@ -217,7 +217,7 @@ contains real(wp), dimension(3), intent(in) :: lambda #:if USING_AMD - real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(20), intent(inout) :: L #:else real(wp), dimension(sys_size), intent(inout) :: L #:endif @@ -250,7 +250,7 @@ contains real(wp), dimension(3), intent(in) :: lambda #:if USING_AMD - real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(20), intent(inout) :: L #:else real(wp), dimension(sys_size), intent(inout) :: L #:endif @@ -280,7 +280,7 @@ contains real(wp), dimension(3), intent(in) :: lambda #:if USING_AMD - real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(20), intent(inout) :: L #:else real(wp), dimension(sys_size), intent(inout) :: L #:endif @@ -308,7 +308,7 @@ contains $:GPU_ROUTINE(function_name='s_compute_supersonic_inflow_L', & & parallelism='[seq]', cray_inline=True) #:if USING_AMD - real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(20), intent(inout) :: L #:else real(wp), dimension(sys_size), intent(inout) :: L #:endif @@ -323,7 +323,7 @@ contains real(wp), dimension(3), intent(in) :: lambda #:if USING_AMD - real(wp), dimension(12), intent(inout) :: L + real(wp), dimension(20), intent(inout) :: L #:else real(wp), dimension(sys_size), intent(inout) :: L #:endif diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 122133f844..1b1738a044 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -543,7 +543,7 @@ module m_global_parameters $:GPU_DECLARE(create='[Bx0,powell]') logical :: fft_wrt - logical :: dummy + logical :: dummy !< AMDFlang workaround: keep a dummy logical to avoid a compiler case-optimization bug when a parameter+GPU-kernel conditional is false !> @name Continuum damage model parameters !> @{! diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 3e690ec3da..b60ebcd65d 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1303,7 +1303,8 @@ contains call s_initialize_global_parameters_module() #:if USING_AMD #:for BC in {-5, -6, -7, -8, -9, -10, -11, -12, -13} - @:PROHIBIT(any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == ${BC}$) .and. sys_size > 12, "CBC module with AMD compiler requires sys_size <= 12 when case optimization is turned off") + @:PROHIBIT(any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == ${BC}$) .and. adv_idx%end > 20 .and. (.not. chemistry), "CBC module with AMD compiler requires adv_idx%end <= 20 when case optimization is turned off") + @:PROHIBIT(any((/bc_x%beg, bc_x%end, bc_y%beg, bc_y%end, bc_z%beg, bc_z%end/) == ${BC}$) .and. sys_size > 20 .and. (chemistry), "CBC module with AMD compiler and chemistry requires sys_size <= 20 when case optimization is turned off") #:endfor #:endif if (bubbles_euler .or. bubbles_lagrange) then