-
Notifications
You must be signed in to change notification settings - Fork 31
276 thread exx phi on grid #324
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -60,7 +60,7 @@ subroutine exx_phi_on_grid(inode,atom,spec,extent,xyz,nsuppfuncs,phi_on_grid,r_i | |
| real(double) :: grid_spacing | ||
| real(double) :: x, y, z, r | ||
| real(double) :: int, n, rest | ||
| real(double) :: xyz_delta(3) | ||
| real(double) :: xyz_delta(3), xyz_offset(3) | ||
|
|
||
| integer :: count1, nsf1 | ||
| integer :: ierr, stat | ||
|
|
@@ -155,19 +155,21 @@ subroutine exx_phi_on_grid(inode,atom,spec,extent,xyz,nsuppfuncs,phi_on_grid,r_i | |
| pz = pz -ijk(3)+1 | ||
| end if overlap_box | ||
| !print*, | ||
| xyz_offset = xyz + rst | ||
| !$omp parallel do collapse(3) schedule(runtime) default(none) & | ||
| !$omp shared(mx,my,mz,px,py,pz,grid_spacing,xyz_offset,pao,spec,phi_on_grid,i_dummy,exx_cartesian,extent) & | ||
| !$omp private(nx,ny,nz,x,y,z,count1,l1,acz,m1,pao_val) | ||
| grid_x_loop: do nx = mx, px | ||
| x = xyz(1) + real(nx,double)*grid_spacing + rst(1) | ||
|
|
||
| grid_y_loop: do ny = my, py | ||
| y = xyz(2) + real(ny,double)*grid_spacing + rst(2) | ||
|
|
||
| grid_z_loop: do nz = mz, pz | ||
| z = xyz(3) + real(nz,double)*grid_spacing + rst(3) | ||
| x = nx*grid_spacing + xyz_offset(1) | ||
| y = ny*grid_spacing + xyz_offset(2) | ||
| z = nz*grid_spacing + xyz_offset(3) | ||
|
Comment on lines
+165
to
+167
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is fine. If there's spare time, I'd like to try the following: precompute
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure this would be worth it. I've just tested the concept with a short program and there seems to be no difference. When accessing xyz in the nested loop, the different values of nx, ny and nz make the memory accessed non-contiguous so we'll be getting lots of cache misses.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should be possible to arrange the data such that the nx,ny,nz accesses are contiguous. Maybe I got it wrong in my comment. In principle I agree, if it seems like this isn't worth it, let's not spend much time on it. |
||
|
|
||
| !norm = sqrt((x-xyz(1))**2+(y-xyz(2))**2+(z-xyz(3))**2) | ||
| !if (norm <= r_h) then | ||
|
|
||
| r = sqrt(x*x+y*y+z*z) | ||
| !r = sqrt(x*x+y*y+z*z) | ||
| !if(r < very_small) then | ||
| ! r = zero | ||
| !end if | ||
|
|
@@ -180,10 +182,7 @@ subroutine exx_phi_on_grid(inode,atom,spec,extent,xyz,nsuppfuncs,phi_on_grid,r_i | |
| zeta_loop: do acz = 1, pao(spec)%angmom(l1)%n_zeta_in_angmom | ||
|
|
||
| magn_loop: do m1 = -l1, l1 | ||
|
|
||
| pao_val = zero | ||
| y_val = zero | ||
|
|
||
|
|
||
| call evaluate_pao(i_dummy,spec,l1,acz,m1,x,y,z,pao_val,exx_cartesian) | ||
|
|
||
| ! Put pao_val directly into phi_on_grid | ||
|
|
@@ -198,6 +197,7 @@ subroutine exx_phi_on_grid(inode,atom,spec,extent,xyz,nsuppfuncs,phi_on_grid,r_i | |
| end do grid_z_loop | ||
| end do grid_y_loop | ||
| end do grid_x_loop | ||
| !$omp end parallel do | ||
|
|
||
| end if | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shorter lines please 🥺