Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/multiply_kernel_default.f90
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
integer :: nd1, nd2, nd3
integer :: naaddr, nbaddr, ncaddr

!$omp single
! Loop over atoms k in current A-halo partn
do k = 1, ahalo%nh_part(kpart)
k_in_halo = ahalo%j_beg(kpart) + k - 1
Expand Down Expand Up @@ -216,6 +217,7 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
nabeg = nabeg + nd1 * nd3
end do ! End of i = 1, at%n_hnab
end do ! End of k = 1, nahpart
!$omp end single
return
end subroutine m_kern_max
!!*****
Expand Down Expand Up @@ -353,6 +355,7 @@ subroutine m_kern_min(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
integer :: nd1, nd2, nd3
integer :: naaddr, nbaddr, ncaddr

!$omp single
! Loop over atoms k in current A-halo partn
do k = 1, ahalo%nh_part(kpart)
k_in_halo = ahalo%j_beg(kpart) + k - 1
Expand Down Expand Up @@ -402,6 +405,7 @@ subroutine m_kern_min(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
nabeg = nabeg + nd1 * nd3
end do
end do
!$omp end single
return
end subroutine m_kern_min
!!*****
Expand Down
4 changes: 4 additions & 0 deletions src/multiply_kernel_gemm.f90
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
integer :: sofar, maxlen, max2, prend1
external :: dgemm

!$omp single
allocate(tempa(1,1), tempc(1,1))
do k = 1, ahalo%nh_part(kpart) ! Loop over atoms k in current A-halo partn
k_in_halo = ahalo%j_beg(kpart) + k - 1
Expand Down Expand Up @@ -273,6 +274,7 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
end do ! end of k = 1, nahpart
if (allocated(tempa)) deallocate(tempa)
if (allocated(tempc)) deallocate(tempc)
!$omp end signle
return
end subroutine m_kern_max
!!*****
Expand Down Expand Up @@ -414,6 +416,7 @@ subroutine m_kern_min(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
real(double), allocatable, dimension(:,:) :: tempb, tempc
external :: dgemm

!$omp single
do k = 1, ahalo%nh_part(kpart) ! Loop over atoms k in current A-halo partn
k_in_halo = ahalo%j_beg(kpart) + k - 1
k_in_part = ahalo%j_seq(k_in_halo)
Expand Down Expand Up @@ -481,6 +484,7 @@ subroutine m_kern_min(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
nabeg = nabeg + nd1 * nd3
end do
end do
!$omp end single
return
end subroutine m_kern_min
!!*****
Expand Down
18 changes: 0 additions & 18 deletions src/multiply_kernel_ompDoii.f90
Original file line number Diff line number Diff line change
Expand Up @@ -184,14 +184,6 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
! OpenMP required indexing variables
integer :: nd1_1st(at%mx_halo), nd2_1st(mx_absb)

!$omp parallel default(none) &
!$omp shared(kpart, ibaddr, ib_nd_acc, nbnab, ibpart, ibseq, &
!$omp k_off, bndim2, mx_absb, mx_part, at, ahalo, chalo, &
!$omp a, b, c) &
!$omp private(i, j, k, j_in_halo, k_in_halo, k_in_part, nbkbeg, &
!$omp nb_nd_kbeg, nd1, nd2, nd3, jpart, jseq, jbnab2ch, &
!$omp nabeg, nbbeg, ncbeg, i_in_prim, icad, naaddr, &
!$omp nbaddr, ncaddr, n1, n2, n3, nd1_1st, nd2_1st)
! Loop over atoms k in current A-halo partn
do k = 1, ahalo%nh_part(kpart)
k_in_halo = ahalo%j_beg(kpart) + k - 1
Expand Down Expand Up @@ -257,7 +249,6 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
end if ! End of if(j_in_halo.ne.0)
end do ! End of j = 1, nbnab
end do ! End of i = 1, at%n_hnab
!$omp end do
end do ! End of k = 1, nahpart
!$omp end parallel
return
Expand Down Expand Up @@ -405,14 +396,6 @@ subroutine m_kern_min(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
! For OpenMP
integer :: nd1_1st(at%mx_halo), nd2_1st(mx_absb)

!$omp parallel default(none) &
!$omp shared(kpart, ibaddr, ib_nd_acc, nbnab, ibpart, ibseq, &
!$omp k_off, bndim2, mx_absb, mx_part, at, ahalo, chalo, &
!$omp a, b, c) &
!$omp private(j_in_halo, k_in_halo, k_in_part, nbkbeg, &
!$omp nb_nd_kbeg, nd1, nd2, nd3, i, j, k, jpart, jseq, &
!$omp jbnab2ch, icad, nabeg, nbbeg, ncbeg, naaddr, nbaddr, &
!$omp ncaddr, n1, n2, n3, i_in_prim, nd1_1st, nd2_1st)
! Loop over atoms k in current A-halo partn
do k = 1, ahalo%nh_part(kpart)
k_in_halo = ahalo%j_beg(kpart) + k - 1
Expand Down Expand Up @@ -472,7 +455,6 @@ subroutine m_kern_min(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
end do
!$omp end do
end do
!$omp end parallel
return
end subroutine m_kern_min
!!*****
Expand Down
22 changes: 2 additions & 20 deletions src/multiply_kernel_ompDoik.f90
Original file line number Diff line number Diff line change
Expand Up @@ -184,14 +184,6 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
! OpenMP required indexing variables
integer :: nd1_1st(at%mx_halo), nd2_1st(mx_absb)

!$omp parallel default(none) &
!$omp shared(kpart, ibaddr, ib_nd_acc, nbnab, ibpart, ibseq, &
!$omp k_off, bndim2, mx_absb, mx_part, at, ahalo, chalo, &
!$omp a, b, c) &
!$omp private(i, j, k, j_in_halo, k_in_halo, k_in_part, nbkbeg, &
!$omp nb_nd_kbeg, nd1, nd2, nd3, jpart, jseq, jbnab2ch, &
!$omp nabeg, nbbeg, ncbeg, i_in_prim, icad, naaddr, &
!$omp nbaddr, ncaddr, n1, n2, n3, nd1_1st, nd2_1st)
! Loop over atoms k in current A-halo partn
do k = 1, ahalo%nh_part(kpart)
k_in_halo = ahalo%j_beg(kpart) + k - 1
Expand All @@ -218,7 +210,7 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
jseq = ibseq(nbkbeg+j-1)
jbnab2ch(j) = chalo%i_halo(chalo%i_hbeg(jpart)+jseq-1)
end do
!$omp do schedule(runtime)
!$omp do schedule(runtime)
! Loop over primary-set A-neighbours of k
do i = 1, at%n_hnab(k_in_halo)
! nabeg = at%i_beg(k_in_halo) + i - 1
Expand Down Expand Up @@ -259,7 +251,6 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
end do ! End of i = 1, at%n_hnab
!$omp end do
end do ! End of k = 1, nahpart
!$omp end parallel
return
end subroutine m_kern_max
!!*****
Expand Down Expand Up @@ -405,16 +396,8 @@ subroutine m_kern_min(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
! For OpenMP
integer :: nd1_1st(at%mx_halo), nd2_1st(mx_absb)

!$omp parallel default(none) &
!$omp shared(kpart, ibaddr, ib_nd_acc, nbnab, ibpart, ibseq, &
!$omp k_off, bndim2, mx_absb, mx_part, at, ahalo, chalo, &
!$omp a, b, c) &
!$omp private(j_in_halo, k_in_halo, k_in_part, nbkbeg, &
!$omp nb_nd_kbeg, nd1, nd2, nd3, i, j, k, jpart, jseq, &
!$omp jbnab2ch, icad, nabeg, nbbeg, ncbeg, naaddr, nbaddr, &
!$omp ncaddr, n1, n2, n3, i_in_prim, nd1_1st, nd2_1st)
! Loop over atoms k in current A-halo partn
!$omp do schedule(runtime)
!$omp do schedule(runtime)
do k = 1, ahalo%nh_part(kpart)
k_in_halo = ahalo%j_beg(kpart) + k - 1
k_in_part = ahalo%j_seq(k_in_halo)
Expand Down Expand Up @@ -472,7 +455,6 @@ subroutine m_kern_min(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
end do
end do
!$omp end do
!$omp end parallel
return
end subroutine m_kern_min
!!*****
Expand Down
18 changes: 0 additions & 18 deletions src/multiply_kernel_ompDojk.f90
Original file line number Diff line number Diff line change
Expand Up @@ -184,14 +184,6 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
! OpenMP required indexing variables
integer :: nd1_1st(at%mx_halo), nd2_1st(mx_absb)

!$omp parallel default(none) &
!$omp shared(kpart, ibaddr, ib_nd_acc, nbnab, ibpart, ibseq, &
!$omp k_off, bndim2, mx_absb, mx_part, at, ahalo, chalo, &
!$omp a, b, c) &
!$omp private(i, j, k, j_in_halo, k_in_halo, k_in_part, nbkbeg, &
!$omp nb_nd_kbeg, nd1, nd2, nd3, jpart, jseq, jbnab2ch, &
!$omp nabeg, nbbeg, ncbeg, i_in_prim, icad, naaddr, &
!$omp nbaddr, ncaddr, n1, n2, n3, nd1_1st, nd2_1st)
! Loop over atoms k in current A-halo partn
do k = 1, ahalo%nh_part(kpart)
k_in_halo = ahalo%j_beg(kpart) + k - 1
Expand Down Expand Up @@ -259,7 +251,6 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
!$omp end do
end do ! End of i = 1, at%n_hnab
end do ! End of k = 1, nahpart
!$omp end parallel
return
end subroutine m_kern_max
!!*****
Expand Down Expand Up @@ -405,14 +396,6 @@ subroutine m_kern_min(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
! For OpenMP
integer :: nd1_1st(at%mx_halo), nd2_1st(mx_absb)

!$omp parallel default(none) &
!$omp shared(kpart, ibaddr, ib_nd_acc, nbnab, ibpart, ibseq, &
!$omp k_off, bndim2, mx_absb, mx_part, at, ahalo, chalo, &
!$omp a, b, c) &
!$omp private(j_in_halo, k_in_halo, k_in_part, nbkbeg, &
!$omp nb_nd_kbeg, nd1, nd2, nd3, i, j, k, jpart, jseq, &
!$omp jbnab2ch, icad, nabeg, nbbeg, ncbeg, naaddr, nbaddr, &
!$omp ncaddr, n1, n2, n3, i_in_prim, nd1_1st, nd2_1st)
! Loop over atoms k in current A-halo partn
!$omp do schedule(runtime)
do k = 1, ahalo%nh_part(kpart)
Expand Down Expand Up @@ -472,7 +455,6 @@ subroutine m_kern_min(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
end do
end do
!$omp end do
!$omp end parallel
return
end subroutine m_kern_min
!!*****
Expand Down
57 changes: 6 additions & 51 deletions src/multiply_kernel_ompGemm.f90
Original file line number Diff line number Diff line change
Expand Up @@ -168,23 +168,13 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
! OpenMP required indexing variables
integer :: nd1_1st(at%mx_halo), nd2_1st(mx_absb)

!$omp parallel default(none) &
!$omp shared(kpart, ibaddr, ib_nd_acc, nbnab, ibpart, ibseq, &
!$omp k_off, bndim2, mx_absb, mx_part, at, ahalo, chalo, &
!$omp a, b, c) &
!$omp private(i, j, k, j_in_halo, k_in_halo, k_in_part, nbkbeg, &
!$omp nb_nd_kbeg, nd1, nd2, nd3, jpart, jseq, jbnab2ch, &
!$omp nabeg, nbbeg, ncbeg, i_in_prim, icad, naaddr, &
!$omp nbaddr, ncaddr, n1, n2, n3, nd1_1st, nd2_1st, &
!$omp tempa, tempb, tempc, prend1, maxlen, sofar)
allocate(tempa(1,1), tempc(1,1))
do k = 1, ahalo%nh_part(kpart) ! Loop over atoms k in current A-halo partn
k_in_halo = ahalo%j_beg(kpart) + k - 1
k_in_part = ahalo%j_seq(k_in_halo)
nbkbeg = ibaddr(k_in_part)
nb_nd_kbeg = ib_nd_acc(k_in_part)
nd3 = ahalo%ndimj(k_in_halo)
! if (PRESENT(debug)) write (21+debug,*) 'Details1: ', k, nb_nd_kbeg
! for OpenMP sub-array indexing
nd1_1st(1) = 0
do i = 2, at%n_hnab(k_in_halo)
Expand All @@ -207,15 +197,15 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
prend1 = 0
!$omp do schedule(runtime)
! Loop over primary-set A-neighbours of k
do i = 1, at%n_hnab(k_in_halo)
! nabeg = at%i_beg(k_in_halo) + i - 1
A_i : do i = 1, at%n_hnab(k_in_halo)
i_in_prim = at%i_prim(at%i_beg(k_in_halo)+i-1)
nd1 = ahalo%ndimi(i_in_prim)
nabeg = at%i_nd_beg(k_in_halo) + nd1_1st(i)
if (nd1 /= prend1) then
deallocate(tempc, tempa)
allocate(tempa(nd1,nd3), tempc(nd1,maxlen))
! allocate(tempa(nd3,nd1), tempc(nd1,maxlen))
deallocate(tempc)
deallocate(tempa)
allocate(tempa(nd1,nd3))
allocate(tempc(nd1,maxlen))
end if
tempa = zero
tempb = zero
Expand All @@ -224,38 +214,17 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
naaddr = nabeg + nd3 * (n1 - 1)
do n3 = 1, nd3
tempa(n1,n3) = a(naaddr+n3-1)
! tempa(n3,n1) = a(naaddr+n3-1)
end do
end do
icad = (i_in_prim - 1) * chalo%ni_in_halo
! nbbeg = nb_nd_kbeg
sofar = 0
do j = 1, nbnab(k_in_part) ! Loop over B-neighbours of atom k
! nbbeg = nbkbeg + j - 1
nd2 = bndim2(nbkbeg+j-1)
nbbeg = nb_nd_kbeg + nd2_1st(j)
j_in_halo = jbnab2ch(j)
if (j_in_halo /= 0) then
ncbeg = chalo%i_h2d(icad+j_in_halo)
! nd2 = chalo%ndimj(j_in_halo)
if (ncbeg /= 0) then ! multiplication of ndim x ndim blocks
! if (present(debug)) &
! write (21+debug,*) 'Details2: ', j, nd2, &
! (nabeg-1)/(nd1*nd3), &
! (ncbeg-1)/(nd1*nd2), &
! (nbbeg-1)/(nd2*nd3)
!DIR$ NOPATTERN
!! do n2=1, nd2
!! nbaddr = nbbeg+nd3*(n2-1)
!! ncaddr = ncbeg+nd1*(n2-1)
!! do n1=1, nd1
!! naaddr=nabeg+nd3*(n1-1)
!! do n3=1, nd3
!! c(ncaddr+n1-1) = c(ncaddr+n1-1) &
!! +a(naaddr+n3-1)*b(nbaddr+n3-1)
!! end do
!! end do
!! end do
do n2 = 1, nd2
nbaddr = nbbeg + nd3 * (n2 - 1)
do n3 = 1, nd3
Expand All @@ -267,9 +236,6 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
end if ! End of if (j_in_halo /= 0)
end do ! End of 1, nbnab
if (sofar > 0) then
! m, n, k, alpha, a, lda, b, ldb, beta, c, ldc
! call dgemm('t', 'n', nd1, sofar, nd3, 1.0_double, tempa, &
! nd3, tempb, nd3,0.0_double, tempc, nd1)
call dgemm('n', 'n', nd1, sofar, nd3, 1.0_double, tempa, &
nd1, tempb, nd3, zero, tempc, nd1)
end if
Expand All @@ -291,13 +257,12 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
end if
end if
end do
end do ! end of i = 1, at%n_hnab
end do A_i
!$omp end do
deallocate(tempb)
end do ! end of k = 1, nahpart
if (allocated(tempa)) deallocate(tempa)
if (allocated(tempc)) deallocate(tempc)
!$omp end parallel
return
end subroutine m_kern_max
!!*****
Expand Down Expand Up @@ -445,15 +410,6 @@ subroutine m_kern_min(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
! mx_a = maxnsf
! maxlen = maxnsf * max(nbnab)

!$omp parallel default(none) &
!$omp shared(kpart, ibaddr, ib_nd_acc, nbnab, ibpart, ibseq, &
!$omp k_off, bndim2, mx_absb, mx_part, at, ahalo, chalo, &
!$omp a, b, c) &
!$omp private(i, j, k, j_in_halo, k_in_halo, k_in_part, nbkbeg, &
!$omp nb_nd_kbeg, nd1, nd2, nd3, jpart, jseq, jbnab2ch, &
!$omp nabeg, nbbeg, ncbeg, i_in_prim, icad, naaddr, &
!$omp nbaddr, ncaddr, n1, n2, n3, nd1_1st, nd2_1st, &
!$omp tempb, tempc, maxlen, sofar)
do k = 1, ahalo%nh_part(kpart) ! Loop over atoms k in current A-halo partn
k_in_halo = ahalo%j_beg(kpart) + k - 1
k_in_part = ahalo%j_seq(k_in_halo)
Expand Down Expand Up @@ -532,7 +488,6 @@ subroutine m_kern_min(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
end do
!$omp end do
end do
!$omp end parallel
return
end subroutine m_kern_min
!!*****
Expand Down
23 changes: 1 addition & 22 deletions src/multiply_kernel_ompGemm_m.f90
Original file line number Diff line number Diff line change
Expand Up @@ -168,16 +168,7 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
! OpenMP required indexing variables
integer :: nd1_1st(at%mx_halo), nd2_1st(mx_absb)

!$omp parallel default(none) &
!$omp shared(kpart, ibaddr, ib_nd_acc, nbnab, ibpart, ibseq, &
!$omp k_off, bndim2, mx_absb, mx_part, at, ahalo, chalo, &
!$omp a, b, c) &
!$omp private(i, j, k, j_in_halo, k_in_halo, k_in_part, nbkbeg, &
!$omp nb_nd_kbeg, nd1, nd2, nd3, jpart, jseq, jbnab2ch, &
!$omp nabeg, nbbeg, ncbeg, i_in_prim, icad, naaddr, &
!$omp nbaddr, ncaddr, n1, n2, n3, nd1_1st, nd2_1st, &
!$omp tempa, tempb, tempc, maxnd1, maxnd2, maxnd3, &
!$omp maxlen, sofar)
! Allocate tempa, tempb, tempc to largest possible size outside the loop
maxnd1 = maxval(ahalo%ndimi)
maxnd2 = maxval(bndim2)
maxnd3 = maxval(ahalo%ndimj)
Expand Down Expand Up @@ -276,7 +267,6 @@ subroutine m_kern_max(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
!$omp end do
end do ! end of k = 1, nahpart
deallocate(tempa, tempb, tempc)
!$omp end parallel
return
end subroutine m_kern_max
!!*****
Expand Down Expand Up @@ -420,16 +410,6 @@ subroutine m_kern_min(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
! OpenMP required indexing variables
integer :: nd1_1st(at%mx_halo), nd2_1st(mx_absb)

!$omp parallel default(none) &
!$omp shared(kpart, ibaddr, ib_nd_acc, nbnab, ibpart, ibseq, &
!$omp k_off, bndim2, mx_absb, mx_part, at, ahalo, chalo, &
!$omp a, b, c) &
!$omp private(i, j, k, j_in_halo, k_in_halo, k_in_part, nbkbeg, &
!$omp nb_nd_kbeg, nd1, nd2, nd3, jpart, jseq, jbnab2ch, &
!$omp nabeg, nbbeg, ncbeg, i_in_prim, icad, naaddr, &
!$omp nbaddr, ncaddr, n1, n2, n3, nd1_1st, nd2_1st, &
!$omp tempb, tempc, maxnd1, maxnd2, maxnd3, maxlen, &
!$omp sofar)
maxnd1 = maxval(ahalo%ndimi)
maxnd2 = maxval(bndim2)
maxnd3 = maxval(ahalo%ndimj)
Expand Down Expand Up @@ -503,7 +483,6 @@ subroutine m_kern_min(k_off, kpart, ib_nd_acc, ibaddr, nbnab, &
!$omp end do
end do
deallocate(tempb, tempc)
!$omp end parallel
return
end subroutine m_kern_min
!!*****
Expand Down
Loading