Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
3581843
Compiling with -xAVX
connoraird Jan 29, 2024
5899b49
use libxc library
connoraird Jan 29, 2024
6774d1f
combine three nsf# loops into one and reduce required allocatable mem…
connoraird Jan 29, 2024
e142c54
Initial attempt to thread nsf loop
connoraird Jan 30, 2024
8d859ea
make exx_v_on_grid thread safe
connoraird Feb 1, 2024
8ba4ef9
Thread exx_phi_on_grid xyz loops
connoraird Feb 5, 2024
8fa4bad
Revert "Thread exx_phi_on_grid xyz loops"
connoraird Feb 6, 2024
f046aa2
Thread rst loop
connoraird Feb 6, 2024
47764e7
Timing threaded kernel
connoraird Feb 6, 2024
27b48fb
reseting to nsf loop with collapse
connoraird Feb 6, 2024
6cfcb4f
Removing unused allocatables and excess timers
connoraird Feb 9, 2024
37b6792
abort on errors and stick to minimum line lengths
connoraird Feb 16, 2024
444a377
First attempt
connoraird Feb 7, 2024
a324ddf
Fixing references to phi_i and Ome_kj
connoraird Feb 7, 2024
74df999
Redefine phi_i and Ome_kj
connoraird Feb 7, 2024
69e3efd
Fixing array declarations
connoraird Feb 7, 2024
ece73a0
Cleaning up code
connoraird Feb 7, 2024
1f80cfd
allocate 1d buffers
connoraird Feb 8, 2024
90d0f79
calling correct deallocate and adding print statements
connoraird Feb 8, 2024
a4e986c
Remving typo
connoraird Feb 8, 2024
9792a66
Make Ome_kj_reduced firstprivate
connoraird Feb 8, 2024
f3aab62
Removing print statements
connoraird Feb 8, 2024
8376f5c
point inside parallel region
connoraird Feb 8, 2024
dabf0af
Fixing broken stuff after rebase
connoraird Feb 9, 2024
cfa1b96
Fix broken phi_i allocation
connoraird Feb 9, 2024
6791e7e
make Ome_kj_1d_buffer a target
connoraird Feb 9, 2024
a241c36
Remove extra stop_timer for tmr_std_exx_accumul
connoraird Feb 12, 2024
7eb8dd8
Making Ome_kj private
connoraird Feb 12, 2024
cee9e0f
remove trailing &
connoraird Feb 13, 2024
41c1c9f
Merge pull request #323 from OrderN/276-use-blas
connoraird Feb 16, 2024
f760861
Merge remote-tracking branch 'origin/f-exx-opt' into 276-combine-nsf-…
connoraird Feb 19, 2024
71e7c1e
Add missing 'call' to cq_abort
connoraird Feb 19, 2024
38ace69
Thread xyz loops
connoraird Feb 6, 2024
70b1539
Ending parallel do
connoraird Feb 7, 2024
ffc3025
Adding missing omp variables
connoraird Feb 7, 2024
3dfeabf
Removing unnecessary zeroing of arrays
connoraird Feb 12, 2024
be386f1
Only zeroing if needed
connoraird Feb 12, 2024
442640c
Merge pull request #324 from OrderN/276-thread-exx-phi-on-grid
connoraird Feb 20, 2024
0e3bf8c
cleaning up comments and max memory allocations
connoraird Feb 21, 2024
8ca8295
Adding new tmr_std_exx_nsup timer
connoraird Feb 21, 2024
5a19e49
Remove commented out code
tkoskela Feb 26, 2024
1cbf5f8
Adding documentation on decisions made
connoraird Feb 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 15 additions & 23 deletions src/exx_evalpao.f90
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ module exx_evalpao
!! Cartesian system is preferred in this case (faster) cf. exx_cartesian = T/F
!! 2023/15/23 14:03 lionel
!! Added dummy argument to evaluate_pao
!!
!! 2024/02/28 11:00 Connor
!! Added OpenMP thread parallelisation around xyz loops
!!
subroutine exx_phi_on_grid(inode,atom,spec,extent,xyz,nsuppfuncs,phi_on_grid,r_int,rst)

use numbers, only: zero, one, two, three, four, five, six, fifteen, sixteen
Expand Down Expand Up @@ -60,7 +62,7 @@ subroutine exx_phi_on_grid(inode,atom,spec,extent,xyz,nsuppfuncs,phi_on_grid,r_i
real(double) :: grid_spacing
real(double) :: x, y, z, r
real(double) :: int, n, rest
real(double) :: xyz_delta(3)
real(double) :: xyz_delta(3), xyz_offset(3)

integer :: count1, nsf1
integer :: ierr, stat
Expand Down Expand Up @@ -154,42 +156,31 @@ subroutine exx_phi_on_grid(inode,atom,spec,extent,xyz,nsuppfuncs,phi_on_grid,r_i
py = py -ijk(2)+1
pz = pz -ijk(3)+1
end if overlap_box
!print*,
xyz_offset = xyz + rst

!$omp parallel do collapse(3) schedule(runtime) default(none) &
!$omp shared(mx,my,mz,px,py,pz,grid_spacing,xyz_offset,pao, &
!$omp spec,phi_on_grid,i_dummy,exx_cartesian,extent) &
!$omp private(nx,ny,nz,x,y,z,count1,l1,acz,m1,pao_val)
grid_x_loop: do nx = mx, px
x = xyz(1) + real(nx,double)*grid_spacing + rst(1)

grid_y_loop: do ny = my, py
y = xyz(2) + real(ny,double)*grid_spacing + rst(2)

grid_z_loop: do nz = mz, pz
z = xyz(3) + real(nz,double)*grid_spacing + rst(3)

!norm = sqrt((x-xyz(1))**2+(y-xyz(2))**2+(z-xyz(3))**2)
!if (norm <= r_h) then

r = sqrt(x*x+y*y+z*z)
!if(r < very_small) then
! r = zero
!end if
!print*, '1 cycle start'
x = nx*grid_spacing + xyz_offset(1)
y = ny*grid_spacing + xyz_offset(2)
z = nz*grid_spacing + xyz_offset(3)

count1 = 1
!sfsum = zero
angu_loop: do l1 = 0, pao(spec)%greatest_angmom

zeta_loop: do acz = 1, pao(spec)%angmom(l1)%n_zeta_in_angmom

magn_loop: do m1 = -l1, l1

pao_val = zero
y_val = zero


call evaluate_pao(i_dummy,spec,l1,acz,m1,x,y,z,pao_val,exx_cartesian)

! Put pao_val directly into phi_on_grid
! (only for primitive PAOs and not for blips)
phi_on_grid(nx+extent+1,ny+extent+1,nz+extent+1,count1) = pao_val
!print*, x, pao_val
count1 = count1 + 1
end do magn_loop
end do zeta_loop
Expand All @@ -198,6 +189,7 @@ subroutine exx_phi_on_grid(inode,atom,spec,extent,xyz,nsuppfuncs,phi_on_grid,r_i
end do grid_z_loop
end do grid_y_loop
end do grid_x_loop
!$omp end parallel do

end if

Expand Down
Loading