Skip to content
104 changes: 31 additions & 73 deletions src/simulation/m_ibm.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,13 @@ module m_ibm
$:GPU_DECLARE(create='[ib_markers]')

type(ghost_point), dimension(:), allocatable :: ghost_points
type(ghost_point), dimension(:), allocatable :: inner_points
$:GPU_DECLARE(create='[ghost_points,inner_points]')
$:GPU_DECLARE(create='[ghost_points]')

integer :: num_gps !< Number of ghost points
integer :: num_inner_gps !< Number of ghost points
#if defined(MFC_OpenACC)
$:GPU_DECLARE(create='[gp_layers,num_gps,num_inner_gps]')
$:GPU_DECLARE(create='[gp_layers,num_gps]')
#elif defined(MFC_OpenMP)
$:GPU_DECLARE(create='[num_gps,num_inner_gps]')
$:GPU_DECLARE(create='[num_gps]')
#endif
logical :: moving_immersed_boundary_flag

Expand All @@ -74,7 +72,7 @@ contains

@:ACC_SETUP_SFs(ib_markers)

$:GPU_ENTER_DATA(copyin='[num_gps,num_inner_gps]')
$:GPU_ENTER_DATA(copyin='[num_gps]')

Comment on lines +75 to 76
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Missing GPU_EXIT_DATA for num_gps.

num_gps is entered to GPU data region here but there's no corresponding GPU_EXIT_DATA in s_finalize_ibm_module to release the device memory.

end subroutine s_initialize_ibm_module

Expand All @@ -83,7 +81,7 @@ contains
impure subroutine s_ibm_setup()

integer :: i, j, k
integer :: max_num_gps, max_num_inner_gps
integer :: max_num_gps

call nvtxStartRange("SETUP-IBM-MODULE")

Expand Down Expand Up @@ -118,19 +116,20 @@ contains
end do

! find the number of ghost points and set them to be the maximum total across ranks
call s_find_num_ghost_points(num_gps, num_inner_gps)
call s_mpi_allreduce_integer_sum(num_gps, max_num_gps)
call s_mpi_allreduce_integer_sum(num_inner_gps, max_num_inner_gps)
max_num_gps = min(max_num_gps, (m + 1)*(n + 1)*(p + 1)/2)
max_num_inner_gps = min(max_num_inner_gps, (m + 1)*(n + 1)*(p + 1)/2)
call s_find_num_ghost_points(num_gps)
if (moving_immersed_boundary_flag) then
call s_mpi_allreduce_integer_sum(num_gps, max_num_gps)
max_num_gps = min(max_num_gps*2, (m + 1)*(n + 1)*(p + 1))
else
max_num_gps = num_gps
end if

! set the size of the ghost point arrays to be the amount of points total, plus a factor of 2 buffer
$:GPU_UPDATE(device='[num_gps, num_inner_gps]')
@:ALLOCATE(ghost_points(1:int((max_num_gps + max_num_inner_gps) * 2.0)))
@:ALLOCATE(inner_points(1:int((max_num_gps + max_num_inner_gps) * 2.0)))
$:GPU_UPDATE(device='[num_gps]')
@:ALLOCATE(ghost_points(1:max_num_gps))

$:GPU_ENTER_DATA(copyin='[ghost_points,inner_points]')
call s_find_ghost_points(ghost_points, inner_points)
$:GPU_ENTER_DATA(copyin='[ghost_points]')
call s_find_ghost_points(ghost_points)
Comment on lines +129 to +132
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Missing @:DEALLOCATE and GPU_EXIT_DATA for ghost_points.

ghost_points is allocated here with @:ALLOCATE and entered to GPU with GPU_ENTER_DATA, but s_finalize_ibm_module does not include matching cleanup calls. This will cause a memory leak on both host and device.

🔧 Proposed fix in s_finalize_ibm_module
 impure subroutine s_finalize_ibm_module()

+    $:GPU_EXIT_DATA(delete='[ghost_points, num_gps]')
+    @:DEALLOCATE(ghost_points)
     @:DEALLOCATE(ib_markers%sf)
     if (allocated(airfoil_grid_u)) then
         @:DEALLOCATE(airfoil_grid_u)
         @:DEALLOCATE(airfoil_grid_l)
     end if

 end subroutine s_finalize_ibm_module

As per coding guidelines: "Every @:ALLOCATE(...) macro call must have a matching @:DEALLOCATE(...) in Fortran files" and "Lifecycle of allocations must be paired with deallocations; ensure all data entering GPU memory has corresponding exit/cleanup."

call s_apply_levelset(ghost_points, num_gps)

call s_compute_image_points(ghost_points)
Expand Down Expand Up @@ -200,18 +199,16 @@ contains
patch_id = ib_markers%sf(j, k, l)
if (patch_id /= 0) then
q_prim_vf(E_idx)%sf(j, k, l) = 1._wp
if (patch_ib(patch_id)%moving_ibm > 0) then
rho = 0._wp
do i = 1, num_fluids
rho = rho + q_prim_vf(contxb + i - 1)%sf(j, k, l)
end do
rho = 0._wp
do i = 1, num_fluids
rho = rho + q_prim_vf(contxb + i - 1)%sf(j, k, l)
end do

! Sets the momentum
do i = 1, num_dims
q_cons_vf(momxb + i - 1)%sf(j, k, l) = patch_ib(patch_id)%vel(i)*rho
q_prim_vf(momxb + i - 1)%sf(j, k, l) = patch_ib(patch_id)%vel(i)
end do
end if
! Sets the momentum
do i = 1, num_dims
q_cons_vf(momxb + i - 1)%sf(j, k, l) = patch_ib(patch_id)%vel(i)*rho
q_prim_vf(momxb + i - 1)%sf(j, k, l) = patch_ib(patch_id)%vel(i)
end do
end if
end do
end do
Expand Down Expand Up @@ -401,24 +398,6 @@ contains
$:END_GPU_PARALLEL_LOOP()
end if

!Correct the state of the inner points in IBs
if (num_inner_gps > 0) then
$:GPU_PARALLEL_LOOP(private='[i,physical_loc,dyn_pres,alpha_rho_IP, alpha_IP,vel_g,rho,gamma,pi_inf,Re_K,innerp,j,k,l,q]')
do i = 1, num_inner_gps

innerp = inner_points(i)
j = innerp%loc(1)
k = innerp%loc(2)
l = innerp%loc(3)

$:GPU_LOOP(parallelism='[seq]')
do q = momxb, momxe
q_cons_vf(q)%sf(j, k, l) = 0._wp
end do
end do
$:END_GPU_PARALLEL_LOOP()
end if

end subroutine s_ibm_correct_state

!> Function that computes the image points for each ghost point
Expand Down Expand Up @@ -533,21 +512,19 @@ contains

!> Subroutine that finds the number of ghost points, used for allocating
!! memory.
subroutine s_find_num_ghost_points(num_gps_out, num_inner_gps_out)
subroutine s_find_num_ghost_points(num_gps_out)

integer, intent(out) :: num_gps_out
integer, intent(out) :: num_inner_gps_out

integer :: i, j, k, ii, jj, kk, gp_layers_z !< Iterator variables
integer :: num_gps_local, num_inner_gps_local !< local copies of the gp count to support GPU compute
integer :: num_gps_local !< local copies of the gp count to support GPU compute
logical :: is_gp

num_gps_local = 0
num_inner_gps_local = 0
gp_layers_z = gp_layers
if (p == 0) gp_layers_z = 0

$:GPU_PARALLEL_LOOP(private='[i,j,k,ii,jj,kk,is_gp]', copy='[num_gps_local,num_inner_gps_local]', firstprivate='[gp_layers,gp_layers_z]', collapse=3)
$:GPU_PARALLEL_LOOP(private='[i,j,k,ii,jj,kk,is_gp]', copy='[num_gps_local]', firstprivate='[gp_layers,gp_layers_z]', collapse=3)
do i = 0, m
do j = 0, n
do k = 0, p
Expand All @@ -568,9 +545,6 @@ contains
if (is_gp) then
$:GPU_ATOMIC(atomic='update')
num_gps_local = num_gps_local + 1
else
$:GPU_ATOMIC(atomic='update')
num_inner_gps_local = num_inner_gps_local + 1
end if
end if
end do
Expand All @@ -579,15 +553,13 @@ contains
$:END_GPU_PARALLEL_LOOP()

num_gps_out = num_gps_local
num_inner_gps_out = num_inner_gps_local

end subroutine s_find_num_ghost_points

!> Function that finds the ghost points
subroutine s_find_ghost_points(ghost_points_in, inner_points_in)
subroutine s_find_ghost_points(ghost_points_in)

type(ghost_point), dimension(num_gps), intent(INOUT) :: ghost_points_in
type(ghost_point), dimension(num_inner_gps), intent(INOUT) :: inner_points_in
integer :: i, j, k, ii, jj, kk, gp_layers_z !< Iterator variables
integer :: xp, yp, zp !< periodicities
integer :: count, count_i, local_idx
Expand Down Expand Up @@ -658,20 +630,6 @@ contains
ghost_points_in(local_idx)%DB(3) = 0
end if
end if

else
$:GPU_ATOMIC(atomic='capture')
count_i = count_i + 1
local_idx = count_i
$:END_GPU_ATOMIC_CAPTURE()

inner_points_in(local_idx)%loc = [i, j, k]
encoded_patch_id = ib_markers%sf(i, j, k)
call s_decode_patch_periodicity(encoded_patch_id, patch_id, xp, yp, zp)
inner_points_in(local_idx)%ib_patch_id = patch_id
ib_markers%sf(i, j, k) = patch_id
inner_points_in(local_idx)%slip = patch_ib(patch_id)%slip

end if
end if
end do
Expand Down Expand Up @@ -972,8 +930,8 @@ contains

call nvtxStartRange("COMPUTE-GHOST-POINTS")
! recalculate the ghost point locations and coefficients
call s_find_num_ghost_points(num_gps, num_inner_gps)
call s_find_ghost_points(ghost_points, inner_points)
call s_find_num_ghost_points(num_gps)
call s_find_ghost_points(ghost_points)
call nvtxEndRange

call nvtxStartRange("COMPUTE-IMAGE-POINTS")
Expand Down
Loading