diff --git a/src/exx_kernel_default.f90 b/src/exx_kernel_default.f90 index 3ef219716..9ea194278 100644 --- a/src/exx_kernel_default.f90 +++ b/src/exx_kernel_default.f90 @@ -344,7 +344,7 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) ! if ( scheme == 1 ) then call exx_mem_alloc(extent,maxsuppfuncs,0,'Phy_k','alloc') - call exx_mem_alloc(extent,maxsuppfuncs,maxsuppfuncs,'Ome_kj','alloc') + call exx_mem_alloc(extent,maxsuppfuncs,maxsuppfuncs,'Ome_kj_1d_buffer','alloc') ! end if ! @@ -799,7 +799,7 @@ subroutine get_X_matrix( exxspin, scheme, backup_eris, niter, siter, level ) ! if ( scheme == 1 ) then call exx_mem_alloc(extent,maxsuppfuncs,0,'Phy_k','dealloc') - call exx_mem_alloc(extent,maxsuppfuncs,maxsuppfuncs,'Ome_kj','dealloc') + call exx_mem_alloc(extent,maxsuppfuncs,maxsuppfuncs,'Ome_kj_1d_buffer','dealloc') ! end if ! @@ -917,6 +917,7 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & use numbers, only: zero, one use matrix_module, only: matrix_halo, matrix_trans use global_module, only: area_exx + use GenBlas, only: dot ! use basic_types, only: primary_set use primary_module, only: bundle @@ -935,8 +936,8 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & exx_psolver, exx_pscheme, & unit_exx_debug ! - use exx_types, only: phi_i, phi_j, phi_k, phi_l, & - Phy_k, Ome_kj, & + use exx_types, only: phi_i_1d_buffer, phi_j, phi_k, phi_l, & + Phy_k, Ome_kj_1d_buffer, & work_in_3d, work_out_3d use exx_types, only: exx_alloc ! @@ -993,6 +994,8 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & real(double) :: dr,dv,K_val real(double) :: exx_mat_elem ! + real(double), pointer :: phi_i(:,:,:,:), Ome_kj(:,:,:) + ! type(prim_atomic_data) :: ia !i_alpha type(neigh_atomic_data) :: jb !j_beta type(neigh_atomic_data) :: kg !k_gamma @@ -1114,10 +1117,11 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! !print*, 'i',i, 'global_num',ia%ip,'spe',ia%spec ! - if ( exx_alloc ) call exx_mem_alloc(extent,ia%nsup,0,'phi_i','alloc') + if ( exx_alloc ) call exx_mem_alloc(extent,ia%nsup,0,'phi_i_1d_buffer','alloc') + phi_i(1:2*extent+1, 1:2*extent+1, 1:2*extent+1, 1:ia%nsup) => phi_i_1d_buffer ! call exx_phi_on_grid(inode,ia%ip,ia%spec,extent, & - ia%xyz,ia%nsup,phi_i,r_int,xyz_zero) + ia%xyz,ia%nsup,phi_i,r_int,xyz_zero) ! !print*, size(chalo%i_h2d), shape(chalo%i_h2d) ! @@ -1156,15 +1160,17 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & call exx_phi_on_grid(inode,jb%global_num,jb%spec,extent, & jb%xyz,jb%nsup,phi_j,r_int,xyz_zero) ! - if ( exx_alloc ) call exx_mem_alloc(extent,0,0,'Ome_kj','alloc') + if ( exx_alloc ) call exx_mem_alloc(extent,0,0,'Ome_kj_1d_buffer','alloc') ! call start_timer(tmr_std_exx_accumul) - !$omp parallel do schedule(runtime) collapse(2) default(none) reduction(+: c) & + !$omp parallel default(none) reduction(+: c) & !$omp shared(kg,jb,tmr_std_exx_poisson,tmr_std_exx_accumul,Phy_k,phi_j,phi_k,ncbeg,ia, & !$omp tmr_std_exx_matmult,ewald_pot,phi_i,exx_psolver,exx_pscheme,extent,dv, & !$omp ewald_rho,inode,pulay_radius,p_omega,p_gauss,w_gauss,reckernel_3d,r_int) & - !$omp private(nsf1,nsf2,work_out_3d,work_in_3d,ewald_charge,Ome_kj,ncaddr,nsf3, & - !$omp exx_mat_elem,r,s,t) + !$omp private(nsf1,nsf2,work_out_3d,work_in_3d,ewald_charge,Ome_kj_1d_buffer,Ome_kj, & + !$omp ncaddr,nsf3,exx_mat_elem,r,s,t) + Ome_kj(1:2*extent+1, 1:2*extent+1, 1:2*extent+1) => Ome_kj_1d_buffer + !$omp do schedule(runtime) collapse(2) do nsf1 = 1, kg%nsup do nsf2 = 1, jb%nsup @@ -1192,31 +1198,19 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & ! do nsf3 = 1, ia%nsup ! - exx_mat_elem = zero - ! - do r = 1, 2*extent+1 - do s = 1, 2*extent+1 - do t = 1, 2*extent+1 - - exx_mat_elem = exx_mat_elem & - + phi_i(t,s,r,nsf3) & - * Ome_kj(t,s,r) - - end do - end do - end do - ! - c(ncaddr + nsf3 - 1) = c(ncaddr + nsf3 - 1) + exx_mat_elem * dv + c(ncaddr + nsf3 - 1) = c(ncaddr + nsf3 - 1) & + + dot((2*extent+1)**3, phi_i(:,:,:,nsf3), 1, Ome_kj, 1) * dv ! end do ! nsf3 = 1, ia%nsup ! end do ! nsf2 = 1, jb%nsup end do ! nsf1 = 1, kg%nsup - !$omp end parallel do + !$omp end do + !$omp end parallel ! call stop_timer(tmr_std_exx_accumul,.true.) ! - if ( exx_alloc ) call exx_mem_alloc(extent,0,0,'Ome_kj','dealloc') + if ( exx_alloc ) call exx_mem_alloc(extent,0,0,'Ome_kj_1d_buffer','dealloc') if ( exx_alloc ) call exx_mem_alloc(extent,jb%nsup,0,'phi_j','dealloc') ! end if ! ( ncbeg /=0 ) @@ -1233,7 +1227,7 @@ subroutine m_kern_exx_cri(k_off, kpart, ib_nd_acc, ibaddr, nbnab, & !!$ ****[ i end loop ]**** !!$ ! - if ( exx_alloc ) call exx_mem_alloc(extent,ia%nsup,0,'phi_i','dealloc') + if ( exx_alloc ) call exx_mem_alloc(extent,ia%nsup,0,'phi_i_1d_buffer','dealloc') ! end do ! End of i = 1, at%n_hnab(k_in_halo) ! diff --git a/src/exx_memory.f90 b/src/exx_memory.f90 index 0fc3c8877..51df63ddf 100644 --- a/src/exx_memory.f90 +++ b/src/exx_memory.f90 @@ -20,8 +20,8 @@ module exx_memory use datatypes - use exx_types, ONLY: phi_i, phi_j, phi_k, phi_l - use exx_types, ONLY: Phy_k, Ome_kj + use exx_types, ONLY: phi_i, phi_i_1d_buffer, phi_j, phi_k, phi_l + use exx_types, ONLY: Phy_k, Ome_kj_1d_buffer use exx_types, ONLY: work_in_3d,work_out_3d @@ -106,6 +106,14 @@ subroutine exx_mem_alloc(extent,nsf1,nsf2,matrix,flag,unit,n_neigh,neigh) !write(*,*) '\phi_{i\alpha} allocated', shape(phi_i) ! ! + case('phi_i_1d_buffer') ! allocate phi_i for primary atom + allocate(phi_i_1d_buffer(nsf1*(2*extent+1)*(2*extent+1)*(2*extent+1)), STAT=stat) + if(stat/=0) call cq_abort('Error allocating memory to phi_i_1d_buffer/exx !',stat) + call reg_alloc_mem(area_exx,nsf1*(2*extent+1)*(2*extent+1)*(2*extent+1),& + type_dbl,matrix,lun) + phi_i_1d_buffer = zero + ! + ! case('phi_j') ! allocate phi_j for neighbour atom [Srange] allocate(phi_j(2*extent+1,2*extent+1,2*extent+1,nsf1), STAT=stat) if(stat/=0) call cq_abort('Error allocating memory to phi_j/exx !',stat) @@ -139,13 +147,13 @@ subroutine exx_mem_alloc(extent,nsf1,nsf2,matrix,flag,unit,n_neigh,neigh) !!$ !!$ ! - case('Ome_kj') ! allocate Ome_kj - allocate(Ome_kj(2*extent+1,2*extent+1,2*extent+1), STAT=stat) - if(stat/=0) call cq_abort('Error allocating memory to Ome_kj/exx !',stat) + case('Ome_kj_1d_buffer') ! allocate Ome_kj_1d_buffer + allocate(Ome_kj_1d_buffer((2*extent+1)*(2*extent+1)*(2*extent+1)), STAT=stat) + if(stat/=0) call cq_abort('Error allocating memory to Ome_kj_1d_buffer/exx !',stat) call reg_alloc_mem(area_exx,(2*extent+1)*(2*extent+1)*(2*extent+1),& type_dbl,matrix,lun) - Ome_kj = zero - !write(unit,*) '\Ome_{k\gamma}_{j\beta} allocated' + Ome_kj_1d_buffer = zero + !write(unit,*) '\Ome_{k\gamma}_{j\beta}_1d_buffer allocated' ! ! case('Phy_k')! allocate Phy_k @@ -287,6 +295,14 @@ subroutine exx_mem_alloc(extent,nsf1,nsf2,matrix,flag,unit,n_neigh,neigh) !write(*,*) '\phi_{i\alpha} deallocated' ! ! + case('phi_i_1d_buffer') + deallocate(phi_i_1d_buffer,STAT=stat) + if(stat/=0) call cq_abort('Error deallocating memory to phi_i_1d_buffer/exx !',stat) + call reg_dealloc_mem(area_exx,nsf1*(2*extent+1)*(2*extent+1)*(2*extent+1),& + type_dbl,matrix,lun) + !write(*,*) '\phi_{i\alpha} deallocated' + ! + ! case('phi_j') deallocate(phi_j,STAT=stat) if(stat/=0) call cq_abort('Error deallocating memory to phi_j/exx !',stat) @@ -311,12 +327,12 @@ subroutine exx_mem_alloc(extent,nsf1,nsf2,matrix,flag,unit,n_neigh,neigh) !write(*,*) '\phi_{l\delta} deallocated' ! ! - case('Ome_kj') - deallocate(Ome_kj,STAT=stat) - if(stat/=0) call cq_abort('Error deallocating memory to Ome_kj/exx !',stat) + case('Ome_kj_1d_buffer') + deallocate(Ome_kj_1d_buffer,STAT=stat) + if(stat/=0) call cq_abort('Error deallocating memory to Ome_kj_1d_buffer/exx !',stat) call reg_dealloc_mem(area_exx,(2*extent+1)*(2*extent+1)*(2*extent+1),& type_dbl,matrix,lun) - !write(unit,*) '\Ome_{k\gamma}_{j\beta} deallocated' + !write(unit,*) '\Ome_{k\gamma}_{j\beta}_1d_buffer deallocated' case('Phy_k') deallocate(Phy_k, STAT=stat) diff --git a/src/exx_types.f90 b/src/exx_types.f90 index 3a34979e2..2f77bbbc4 100644 --- a/src/exx_types.f90 +++ b/src/exx_types.f90 @@ -55,14 +55,15 @@ module exx_types ! end type fftw1d ! PAOs on grid - real(double), dimension(:,:,:,:), allocatable :: phi_i - real(double), dimension(:,:,:,:), allocatable :: phi_j - real(double), dimension(:,:,:,:), allocatable :: phi_k - real(double), dimension(:,:,:,:), allocatable :: phi_l + real(double), dimension(:,:,:,:), allocatable :: phi_i + real(double), dimension(:), allocatable, target :: phi_i_1d_buffer + real(double), dimension(:,:,:,:), allocatable :: phi_j + real(double), dimension(:,:,:,:), allocatable :: phi_k + real(double), dimension(:,:,:,:), allocatable :: phi_l ! Auxiliary densities and potentials - real(double), dimension(:,:,:), allocatable :: Ome_kj - real(double), dimension(:,:,:,:), allocatable :: Phy_k + real(double), dimension(:), allocatable, target :: Ome_kj_1d_buffer + real(double), dimension(:,:,:,:), allocatable :: Phy_k