Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
ec0c730
Consolidating OpenACC device-host memory transfers
abishekg7 May 13, 2025
e028816
Fixing bug associated with rho_zz_2 not being copied out at the end o…
abishekg7 May 22, 2025
e3365be
Moving some OpenACC data movements to subroutines
abishekg7 Jun 14, 2025
86e0c30
Removing acc data xfer timers for device variables using create/delete
abishekg7 Jul 3, 2025
2ca728a
Using acc declare create for rho_zz_int and corresponding cleanup
abishekg7 Jul 8, 2025
fe17e5e
Removing atm_advance_scalars_mono ACC_data_xfer timers around create/…
abishekg7 Jul 8, 2025
6084ed4
Simplifying OpenACC data transfers around the call to mpas_reconstruc…
abishekg7 Jul 8, 2025
231ede7
Need to copyout u_2 and w_2 at the end of dynamics
abishekg7 Aug 14, 2025
fb72eda
Fixes to produce correct results with CURVATURE
abishekg7 Aug 22, 2025
24931e5
Adding option to enable GPU execution of mpas_reconstruct_2d
abishekg7 Oct 3, 2025
6135ade
fixes needed with intel compiler
abishekg7 Oct 13, 2025
8388ad3
Make ACC data movements of lbc variables contigent on config_apply_lbcs
abishekg7 Nov 12, 2025
738138a
Some more fixes to get GPU runs working
abishekg7 Feb 24, 2026
aa48f49
Renaming subroutines as per review comment
abishekg7 Mar 3, 2026
164edb7
Add data movement for some fields under the mpas_halo_groups
May 7, 2025
608a853
Add a data region and acc kernels to the 2D packing code
May 7, 2025
e1fcb46
Add the update directives that should have been part of the last commit
May 7, 2025
5b66082
Comment out data present region, see if this causes an error
May 7, 2025
d8ead8d
Expand the data managed on the GPU for the halo exchange
May 7, 2025
bac090a
Remove the OpenACC management of recvBuf
May 7, 2025
676ef26
Add update host(sendBuf) back, address answer diff
May 7, 2025
269ff35
Expand to other packing kernels, only update sendBuf after packing fi…
May 7, 2025
dbb7128
Change to simple integers to access the buffers and the field arrays
May 7, 2025
afc875c
Add kernels to unpacking loops and use a data present region to try t…
May 8, 2025
447f453
Change from data copyin regions to enter/exit directives for the r?ar…
May 8, 2025
f79f3da
Re-enable update host for sendBuf, add update device recvBuf
May 8, 2025
8f48d2d
Remove update directives, use acc host_data use_device(...) near MPI …
May 8, 2025
9061a4f
checkpoints: acc pack + cuda aware mpi working
abishekg7 Aug 6, 2025
187e06a
seems to be working
abishekg7 Aug 7, 2025
882d5cc
Optimized packing and unpacking loops. Adding timers and other cleanup
abishekg7 Aug 7, 2025
f5e8bbb
Working savepoint
abishekg7 Aug 12, 2025
0d979b0
u_2 and w_2 need to be copied out after dynamics + cleanup
abishekg7 Aug 13, 2025
b04d0d4
using attach in a directive instead of the acc_attach library call
abishekg7 Aug 13, 2025
b39fd34
Using attach clause in parallel region will also auto detach at end o…
abishekg7 Aug 13, 2025
6d449a2
Reverting the indexing in loops and comment cleanup
abishekg7 Aug 13, 2025
4d769ef
New namelist option to switch on or off GPU-Aware MPI
abishekg7 Aug 13, 2025
009f942
Adding a dependency to mpas_timer.o in mpas_halo.o
abishekg7 Sep 25, 2025
3d2c334
Adding option to choose GPU-direct halo exchanges in abstract interface
abishekg7 Mar 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/core_atmosphere/Registry.xml
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,10 @@
units="-"
description="Method to use for exchanging halos"
possible_values="`mpas_dmpar', `mpas_halo'"/>
<nml_option name="config_gpu_aware_mpi" type="logical" default_value="false"
units="-"
description="Whether to use GPU-aware MPI for halo exchanges"
possible_values=".true. or .false."/>
</nml_record>

<!-- **************************************************************************************** -->
Expand Down
32 changes: 0 additions & 32 deletions src/core_atmosphere/dynamics/mpas_atm_boundaries.F
Original file line number Diff line number Diff line change
Expand Up @@ -395,18 +395,14 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t
nullify(tend)
call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1)

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]')
if (associated(tend)) then
!$acc enter data copyin(tend)
else
call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1)
!$acc enter data copyin(tend_scalars)

! Ensure the integer pointed to by idx_ptr is copied to the gpu device
call mpas_pool_get_dimension(lbc, 'index_'//trim(field), idx_ptr)
idx = idx_ptr
end if
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]')

!$acc parallel default(present)
if (associated(tend)) then
Expand All @@ -426,13 +422,6 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t
end if
!$acc end parallel

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]')
if (associated(tend)) then
!$acc exit data delete(tend)
else
!$acc exit data delete(tend_scalars)
end if
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]')

end subroutine mpas_atm_get_bdy_tend

Expand Down Expand Up @@ -533,9 +522,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
! query the field as a scalar constituent
!
if (associated(tend) .and. associated(state)) then
MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc enter data copyin(tend, state)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')

!$acc parallel default(present)
!$acc loop gang vector collapse(2)
Expand All @@ -546,20 +532,13 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
end do
!$acc end parallel

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc exit data delete(tend, state)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
else
call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1)
call mpas_pool_get_array(lbc, 'lbc_scalars', state_scalars, 2)
call mpas_pool_get_dimension(lbc, 'index_'//trim(field), idx_ptr)

idx=idx_ptr ! Avoid non-array pointer for OpenACC

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc enter data copyin(tend_scalars, state_scalars)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')

!$acc parallel default(present)
!$acc loop gang vector collapse(2)
do i=1, horizDim+1
Expand All @@ -569,9 +548,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
end do
!$acc end parallel

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc exit data delete(tend_scalars, state_scalars)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
end if

end subroutine mpas_atm_get_bdy_state_2d
Expand Down Expand Up @@ -652,10 +628,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim,
call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1)
call mpas_pool_get_array(lbc, 'lbc_'//trim(field), state, 2)

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')
!$acc enter data copyin(tend, state)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')

!$acc parallel default(present)
!$acc loop gang vector collapse(3)
do i=1, horizDim+1
Expand All @@ -667,10 +639,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim,
end do
!$acc end parallel

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')
!$acc exit data delete(tend, state)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')

end subroutine mpas_atm_get_bdy_state_3d


Expand Down
47 changes: 45 additions & 2 deletions src/core_atmosphere/dynamics/mpas_atm_iau.F
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@
! Additional copyright and license information can be found in the LICENSE file
! distributed with this code, or at http://mpas-dev.github.com/license.html
!

#ifdef MPAS_OPENACC
#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
#else
#define MPAS_ACC_TIMER_START(X)
#define MPAS_ACC_TIMER_STOP(X)
#endif

module mpas_atm_iau

use mpas_derived_types
Expand All @@ -13,9 +22,10 @@ module mpas_atm_iau
use mpas_dmpar
use mpas_constants
use mpas_log, only : mpas_log_write
use mpas_timer

!public :: atm_compute_iau_coef, atm_add_tend_anal_incr

!public :: atm_compute_iau_coef, atm_add_tend_anal_incr

contains

!==================================================================================================
Expand Down Expand Up @@ -76,6 +86,39 @@ real (kind=RKIND) function atm_iau_coef(configs, itimestep, dt) result(wgt_iau)
end if

end function atm_iau_coef

!==================================================================================================
subroutine update_d2h_pre_add_tend_anal_incr(configs,structs)
!==================================================================================================

implicit none

type (mpas_pool_type), intent(in) :: configs
type (mpas_pool_type), intent(inout) :: structs

type (mpas_pool_type), pointer :: tend
type (mpas_pool_type), pointer :: state
type (mpas_pool_type), pointer :: diag

real (kind=RKIND), dimension(:,:), pointer :: rho_edge, rho_zz, theta_m
real(kind=RKIND),dimension(:,:,:), pointer :: scalars, tend_scalars

call mpas_pool_get_subpool(structs, 'tend', tend)
call mpas_pool_get_subpool(structs, 'state', state)
call mpas_pool_get_subpool(structs, 'diag', diag)

MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
call mpas_pool_get_array(state, 'theta_m', theta_m, 1)
call mpas_pool_get_array(state, 'scalars', scalars, 1)
call mpas_pool_get_array(state, 'rho_zz', rho_zz, 2)
call mpas_pool_get_array(diag , 'rho_edge', rho_edge)
!$acc update self(theta_m, scalars, rho_zz, rho_edge)

call mpas_pool_get_array(tend, 'scalars_tend', tend_scalars)
!$acc update self(tend_scalars)
MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')

end subroutine update_d2h_pre_add_tend_anal_incr

!==================================================================================================
subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, tend_rtheta, tend_rho)
Expand Down
Loading