From 830a1ab0085acf154f15a2732d5c809e1256a47d Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Tue, 3 Mar 2020 11:26:29 -0800 Subject: [PATCH 01/16] Localized OpenMP threading in mode_forward Move MPAS-O OpenMP thread parallel regions to be local to the loops in mode_forward/ instead of instantiating the parallel region around the timestep loop. --- .../mode_forward/mpas_ocn_forward_mode.F | 4 - .../mode_forward/mpas_ocn_time_integration.F | 1 + .../mpas_ocn_time_integration_rk4.F | 87 ++++++++--- .../mpas_ocn_time_integration_split.F | 141 +++++++++++++----- 4 files changed, 172 insertions(+), 61 deletions(-) diff --git a/src/core_ocean/mode_forward/mpas_ocn_forward_mode.F b/src/core_ocean/mode_forward/mpas_ocn_forward_mode.F index 3bb95844b2..6dfa8810e4 100644 --- a/src/core_ocean/mode_forward/mpas_ocn_forward_mode.F +++ b/src/core_ocean/mode_forward/mpas_ocn_forward_mode.F @@ -588,12 +588,8 @@ function ocn_forward_mode_run(domain) result(ierr)!{{{ #ifdef MPAS_DEBUG call mpas_log_write( ' Computing forward time step') #endif - !$omp parallel default(firstprivate) shared(domain, dt, timeStamp) - call ocn_timestep(domain, dt, timeStamp) - !$omp end parallel - call mpas_timer_stop("time integration") ! Move time level 2 fields back into time level 1 for next time step diff --git a/src/core_ocean/mode_forward/mpas_ocn_time_integration.F b/src/core_ocean/mode_forward/mpas_ocn_time_integration.F index 606aeb9f64..4fd7a40ac8 100644 --- a/src/core_ocean/mode_forward/mpas_ocn_time_integration.F +++ b/src/core_ocean/mode_forward/mpas_ocn_time_integration.F @@ -127,6 +127,7 @@ subroutine ocn_timestep(domain, dt, timeStamp)!{{{ call mpas_set_time(xtime_timeType, dateTimeString=xtime) call mpas_set_time(simulationStartTime_timeType, dateTimeString=simulationStartTime) call mpas_get_timeInterval(xtime_timeType - simulationStartTime_timeType,dt=daysSinceStartOfSim) + daysSinceStartOfSim = daysSinceStartOfSim*days_per_second block => block % next diff --git a/src/core_ocean/mode_forward/mpas_ocn_time_integration_rk4.F b/src/core_ocean/mode_forward/mpas_ocn_time_integration_rk4.F index 0f61b5eccf..553f0ba8f7 100644 --- a/src/core_ocean/mode_forward/mpas_ocn_time_integration_rk4.F +++ b/src/core_ocean/mode_forward/mpas_ocn_time_integration_rk4.F @@ -252,6 +252,7 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ call mpas_pool_get_array(meshPool, 'maxLevelCell', maxLevelCell) call mpas_pool_get_array(meshPool, 'maxLevelEdgeTop', maxLevelEdgeTop) + !$omp parallel !$omp do schedule(runtime) private(k) do iEdge = 1, nEdges do k = 1, maxLevelEdgeTop(iEdge) @@ -267,6 +268,7 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ end do end do !$omp end do + !$omp end parallel call mpas_pool_begin_iteration(tracersPool) do while ( mpas_pool_get_next_member(tracersPool, groupItr) ) @@ -277,6 +279,7 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ call mpas_pool_get_array(tracersPool, trim(groupItr % memberName), tracersNew, 2) if ( associated(tracersCur) .and. associated(tracersNew) ) then + !$omp parallel !$omp do schedule(runtime) private(k) do iCell = 1, nCells ! couple tracers to thickness do k = 1, maxLevelCell(iCell) @@ -284,24 +287,29 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ end do end do !$omp end do + !$omp end parallel end if end if end do if (associated(highFreqThicknessCur)) then + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells highFreqThicknessNew(:, iCell) = highFreqThicknessCur(:, iCell) end do !$omp end do + !$omp end parallel end if if (associated(lowFreqDivergenceCur)) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell) do iCell = 1, nCells lowFreqDivergenceNew(:, iCell) = lowFreqDivergenceCur(:, iCell) end do !$omp end do + !$omp end parallel end if block => block % next @@ -453,12 +461,14 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ call mpas_pool_get_dimension(block % dimensions, 'nCells', nCells) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell) do iCell = 1, nCells highFreqThicknessProvis(:, iCell) = highFreqThicknessCur(:, iCell) + rk_substep_weights(rk_step) & * highFreqThicknessTend(:, iCell) end do !$omp end do + !$omp end parallel block => block % next end do @@ -675,19 +685,23 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ if (config_prescribe_velocity) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge) do iEdge = 1, nEdges normalVelocityNew(:, iEdge) = normalVelocityCur(:, iEdge) end do !$omp end do + !$omp end parallel end if if (config_prescribe_thickness) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell) do iCell = 1, nCells layerThicknessNew(:, iCell) = layerThicknessCur(:, iCell) end do !$omp end do + !$omp end parallel end if ! direct application of tidal boundary condition @@ -719,11 +733,13 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ ! ------------------------------------------------------------------ ! Accumulating various parameterizations of the transport velocity ! ------------------------------------------------------------------ - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge) do iEdge = 1, nEdges normalTransportVelocity(:, iEdge) = normalVelocityNew(:, iEdge) end do !$omp end do + !$omp end parallel ! Compute normalGMBolusVelocity and the tracer transport velocity if (config_use_GM) then @@ -733,11 +749,13 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ call mpas_threading_barrier() if (config_use_GM) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge) do iEdge = 1, nEdges normalTransportVelocity(:, iEdge) = normalTransportVelocity(:, iEdge) + normalGMBolusVelocity(:, iEdge) end do !$omp end do + !$omp end parallel end if ! ------------------------------------------------------------------ ! End: Accumulating various parameterizations of the transport velocity @@ -754,7 +772,8 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ includeHalos = .true.) call mpas_threading_barrier() - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell) do iCell = 1, nCells surfaceVelocity(indexSurfaceVelocityZonal, iCell) = velocityZonal(1, iCell) surfaceVelocity(indexSurfaceVelocityMeridional, iCell) = velocityMeridional(1, iCell) @@ -763,6 +782,7 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ SSHGradient(indexSSHGradientMeridional, iCell) = gradSSHMeridional(iCell) end do !$omp end do + !$omp end parallel call ocn_time_average_coupled_accumulate(diagnosticsPool, statePool, forcingPool, 2) @@ -1163,7 +1183,8 @@ subroutine ocn_time_integrator_rk4_diagnostic_update(block, dt, rkSubstepWeight, call mpas_pool_get_array(diagnosticsPool, 'wettingVelocity', wettingVelocity) call mpas_threading_barrier() - !$omp do schedule(runtime) private(k) + !$omp parallel + !$omp do schedule(runtime) private(k, iEdge) do iEdge = 1, nEdges do k = 1, maxLevelEdgeTop(iEdge) normalVelocityProvis(k, iEdge) = normalVelocityCur(k, iEdge) + rkSubstepWeight & @@ -1173,8 +1194,7 @@ subroutine ocn_time_integrator_rk4_diagnostic_update(block, dt, rkSubstepWeight, end do !$omp end do - - !$omp do schedule(runtime) private(k) + !$omp do schedule(runtime) private(k, iCell) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) layerThicknessProvis(k, iCell) = layerThicknessCur(k, iCell) + rkSubstepWeight & @@ -1182,6 +1202,7 @@ subroutine ocn_time_integrator_rk4_diagnostic_update(block, dt, rkSubstepWeight, end do end do !$omp end do + !$omp end parallel call mpas_pool_begin_iteration(tracersPool) do while ( mpas_pool_get_next_member(tracersPool, groupItr) ) @@ -1196,7 +1217,8 @@ subroutine ocn_time_integrator_rk4_diagnostic_update(block, dt, rkSubstepWeight, modifiedGroupName = trim(groupItr % memberName) // 'Tend' call mpas_pool_get_array(tracersTendPool, modifiedGroupName, tracersGroupTend) if ( associated(tracersGroupProvis) .and. associated(tracersGroupCur) .and. associated(tracersGroupTend) ) then - !$omp do schedule(runtime) private(k) + !$omp parallel + !$omp do schedule(runtime) private(k, iCell) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) tracersGroupProvis(:, k, iCell) = ( layerThicknessCur(k, iCell) * tracersGroupCur(:, k, iCell) & @@ -1206,34 +1228,41 @@ subroutine ocn_time_integrator_rk4_diagnostic_update(block, dt, rkSubstepWeight, end do !$omp end do + !$omp end parallel end if end if end if end do if (associated(lowFreqDivergenceCur)) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell) do iCell = 1, nCells lowFreqDivergenceProvis(:, iCell) = lowFreqDivergenceCur(:, iCell) + rkSubstepWeight & * lowFreqDivergenceTend(:, iCell) end do !$omp end do + !$omp end parallel end if if (config_prescribe_velocity) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge) do iEdge = 1, nEdges normalVelocityProvis(:, iEdge) = normalVelocityCur(:, iEdge) end do !$omp end do + !$omp end parallel end if if (config_prescribe_thickness) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell) do iCell = 1, nCells layerThicknessProvis(:, iCell) = layerThicknessCur(:, iCell) end do !$omp end do + !$omp end parallel end if call mpas_threading_barrier() @@ -1243,11 +1272,13 @@ subroutine ocn_time_integrator_rk4_diagnostic_update(block, dt, rkSubstepWeight, ! ------------------------------------------------------------------ ! Accumulating various parametrizations of the transport velocity ! ------------------------------------------------------------------ - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge) do iEdge = 1, nEdges normalTransportVelocity(:, iEdge) = normalVelocityProvis(:, iEdge) end do !$omp end do + !$omp end parallel ! Compute normalGMBolusVelocity, relativeSlope and RediDiffVertCoef if respective flags are turned on if (config_use_GM) then @@ -1257,11 +1288,13 @@ subroutine ocn_time_integrator_rk4_diagnostic_update(block, dt, rkSubstepWeight, call mpas_threading_barrier() if (config_use_GM) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge) do iEdge = 1, nEdges normalTransportVelocity(:, iEdge) = normalTransportVelocity(:, iEdge) + normalGMBolusVelocity(:,iEdge) end do !$omp end do + !$omp end parallel end if ! ------------------------------------------------------------------ ! End: Accumulating various parametrizations of the transport velocity @@ -1327,7 +1360,8 @@ subroutine ocn_time_integrator_rk4_accumulate_update(block, rkWeight, err)!{{{ call mpas_pool_get_array(meshPool, 'maxLevelCell', maxLevelCell) - !$omp do schedule(runtime) private(k) + !$omp parallel + !$omp do schedule(runtime) private(k, iCell) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) layerThicknessNew(k, iCell) = layerThicknessNew(k, iCell) + rkWeight * layerThicknessTend(k, iCell) @@ -1335,12 +1369,13 @@ subroutine ocn_time_integrator_rk4_accumulate_update(block, rkWeight, err)!{{{ end do !$omp end do - !$omp do schedule(runtime) + !$omp do schedule(runtime) private(iEdge) do iEdge = 1, nEdges normalVelocityNew(:, iEdge) = normalVelocityNew(:, iEdge) + rkWeight * normalVelocityTend(:, iEdge) normalVelocityNew(:, iEdge) = normalVelocityNew(:, iEdge) * (1.0_RKIND - wettingVelocity(:, iEdge)) end do !$omp end do + !$omp end parallel call mpas_pool_begin_iteration(tracersPool) do while ( mpas_pool_get_next_member(tracersPool, groupItr) ) @@ -1354,7 +1389,8 @@ subroutine ocn_time_integrator_rk4_accumulate_update(block, rkWeight, err)!{{{ modifiedGroupName = trim(groupItr % memberName) // 'Tend' call mpas_pool_get_array(tracersTendPool, modifiedGroupName, tracersGroupTend) if ( associated(tracersGroupNew) .and. associated(tracersGroupTend) ) then - !$omp do schedule(runtime) private(k) + !$omp parallel + !$omp do schedule(runtime) private(k, iCell) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) tracersGroupNew(:, k, iCell) = tracersGroupNew(:, k, iCell) + rkWeight & @@ -1362,25 +1398,30 @@ subroutine ocn_time_integrator_rk4_accumulate_update(block, rkWeight, err)!{{{ end do end do !$omp end do + !$omp end parallel end if end if end if end do if (associated(highFreqThicknessNew)) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell) do iCell = 1, nCells highFreqThicknessNew(:, iCell) = highFreqThicknessNew(:, iCell) + rkWeight * highFreqThicknessTend(:, iCell) end do !$omp end do + !$omp end parallel end if if (associated(lowFreqDivergenceNew)) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell) do iCell = 1, nCells lowFreqDivergenceNew(:, iCell) = lowFreqDivergenceNew(:, iCell) + rkWeight * lowFreqDivergenceTend(:, iCell) end do !$omp end do + !$omp end parallel end if end subroutine ocn_time_integrator_rk4_accumulate_update!}}} @@ -1441,13 +1482,15 @@ subroutine ocn_time_integrator_rk4_cleanup(block, dt, err)!{{{ if ( groupItr % memberType == MPAS_POOL_FIELD ) then call mpas_pool_get_array(tracersPool, groupItr % memberName, tracersGroupNew, 2) if ( associated(tracersGroupNew) ) then - !$omp do schedule(runtime) private(k) + !$omp parallel + !$omp do schedule(runtime) private(k, iCell) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) tracersGroupNew(:, k, iCell) = tracersGroupNew(:, k, iCell) / layerThicknessNew(k, iCell) end do end do !$omp end do + !$omp end parallel end if end if end do diff --git a/src/core_ocean/mode_forward/mpas_ocn_time_integration_split.F b/src/core_ocean/mode_forward/mpas_ocn_time_integration_split.F index add39d4c73..1354d1a958 100644 --- a/src/core_ocean/mode_forward/mpas_ocn_time_integration_split.F +++ b/src/core_ocean/mode_forward/mpas_ocn_time_integration_split.F @@ -322,7 +322,8 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ ! Initialize * variables that are used to compute baroclinic tendencies below. - !$omp do schedule(runtime) private(k) + !$omp parallel + !$omp do schedule(runtime) private(k, iEdge) do iEdge = 1, nEdges do k = 1, nVertLevels !maxLevelEdgeTop % array(iEdge) @@ -341,7 +342,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do !$omp end do - !$omp do schedule(runtime) private(k) + !$omp do schedule(runtime) private(k, iCell) do iCell = 1, nCells sshNew(iCell) = sshCur(iCell) do k = 1, maxLevelCell(iCell) @@ -351,6 +352,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do end do !$omp end do + !$omp end parallel call mpas_pool_begin_iteration(tracersPool) do while ( mpas_pool_get_next_member(tracersPool, groupItr)) @@ -359,32 +361,38 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ call mpas_pool_get_array(tracersPool, groupItr % memberName, tracersGroupNew, 2) if ( associated(tracersGroupCur) .and. associated(tracersGroupNew) ) then - !$omp do schedule(runtime) private(k) + !$omp parallel + !$omp do schedule(runtime) private(k, iCell) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) tracersGroupNew(:,k,iCell) = tracersGroupCur(:,k,iCell) end do end do !$omp end do + !$omp end parallel end if end if end do if (associated(highFreqThicknessNew)) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell) do iCell = 1, nCells highFreqThicknessNew(:, iCell) = highFreqThicknessCur(:, iCell) end do !$omp end do + !$omp end parallel end if if (associated(lowFreqDivergenceNew)) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell) do iCell = 1, nCells lowFreqDivergenceNew(:, iCell) = lowFreqDivergenceCur(:, iCell) end do !$omp end do + !$omp end parallel endif block => block % next @@ -477,7 +485,8 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ nCells = nCellsPtr - !$omp do schedule(runtime) private(k) + !$omp parallel + !$omp do schedule(runtime) private(k, iCell) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) ! this is h^{hf}_{n+1} @@ -485,6 +494,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do end do !$omp end do + !$omp end parallel block => block % next end do @@ -569,7 +579,9 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ allocate(uTemp(nVertLevels)) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iEdge, cell1, cell2, uTemp, k, normalThicknessFluxSum, thicknessSum) do iEdge = 1, nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) @@ -612,6 +624,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ enddo ! iEdge !$omp end do + !$omp end parallel deallocate(uTemp) @@ -674,7 +687,8 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ ! For Split_Explicit unsplit, simply set normalBarotropicVelocityNew=0, normalBarotropicVelocitySubcycle=0, and ! uNew=normalBaroclinicVelocityNew - !$omp do schedule(runtime) private(k) + !$omp parallel + !$omp do schedule(runtime) private(k, iEdge) do iEdge = 1, nEdges normalBarotropicVelocityNew(iEdge) = 0.0_RKIND do k = 1, nVertLevels @@ -689,6 +703,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ enddo end do ! iEdge !$omp end do + !$omp end parallel block => block % next end do ! block @@ -723,21 +738,24 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ nEdges = nEdgesPtr if (config_filter_btr_mode) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge) do iEdge = 1, nEdges barotropicForcing(iEdge) = 0.0_RKIND end do !$omp end do + !$omp end parallel endif - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell) do iCell = 1, nCells ! sshSubcycleOld = sshOld sshSubcycleCur(iCell) = sshCur(iCell) end do !$omp end do - !$omp do schedule(runtime) + !$omp do schedule(runtime) private(iEdge) do iEdge = 1, nEdges ! normalBarotropicVelocitySubcycleOld = normalBarotropicVelocityOld @@ -750,6 +768,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ barotropicThicknessFlux(iEdge) = 0.0_RKIND end do !$omp end do + !$omp end parallel block => block % next end do ! block @@ -834,7 +853,9 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ nEdges = nEdgesPtr nEdges = nEdgesArray( edgeHaloComputeCounter ) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iEdge, temp_mask, cell1, cell2, CoriolisTerm, i, eoe) do iEdge = 1, nEdges temp_mask = edgeMask(1, iEdge) @@ -859,6 +880,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do !$omp end do + !$omp end parallel block => block % next end do ! block @@ -916,7 +938,9 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ ! config_btr_gam1_velWt1=0.5 flux = 1/2*(normalBarotropicVelocityNew+normalBarotropicVelocityOld)*H ! config_btr_gam1_velWt1= 0 flux = normalBarotropicVelocityOld*H - !$omp do schedule(runtime) private(i, iEdge, cell1, cell2, sshEdge, thicknessSum, flux) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iCell, i, iEdge, cell1, cell2, sshEdge, thicknessSum, flux) do iCell = 1, nCells sshTend(iCell) = 0.0_RKIND do i = 1, nEdgesOnCell(iCell) @@ -951,6 +975,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ + dt / nBtrSubcycles * sshTend(iCell) / areaCell(iCell) end do !$omp end do + !$omp end parallel !! asarje: changed to avoid redundant computations when config_btr_solve_SSH2 is true @@ -968,7 +993,9 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ ! otherwise, DO accumulate barotropicThicknessFlux in this SSH predictor section barotropicThicknessFlux_coeff = 1.0_RKIND - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iEdge, cell1, cell2, sshEdge, thicknessSum, flux) do iEdge = 1, nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) @@ -985,6 +1012,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ barotropicThicknessFlux(iEdge) = barotropicThicknessFlux(iEdge) + flux end do !$omp end do + !$omp end parallel endif @@ -1061,15 +1089,19 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ nEdges = nEdgesArray( min(edgeHaloComputeCounter + 1, config_num_halos + 1) ) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge) do iEdge = 1, nEdges+1 btrvel_temp(iEdge) = normalBarotropicVelocitySubcycleNew(iEdge) end do !$omp end do + !$omp end parallel nEdges = nEdgesArray( edgeHaloComputeCounter ) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iEdge, temp_mask, cell1, cell2, coriolisTerm, eoe, sshCell1, sshCell2) do iEdge = 1, nEdges ! asarje: added to avoid redundant computations based on mask @@ -1103,6 +1135,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do !$omp end do + !$omp end parallel block => block % next end do ! block @@ -1164,7 +1197,9 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ ! config_btr_gam3_velWt2=0.5 flux = 1/2*(normalBarotropicVelocityNew+normalBarotropicVelocityOld)*H ! config_btr_gam3_velWt2= 0 flux = normalBarotropicVelocityOld*H - !$omp do schedule(runtime) private(i, iEdge, cell1, cell2, sshCell1, sshCell2, sshEdge, thicknessSum, flux) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(i, iEdge, cell1, cell2, sshCell1, sshCell2, sshEdge, thicknessSum, flux) do iCell = 1, nCells sshTend(iCell) = 0.0_RKIND do i = 1, nEdgesOnCell(iCell) @@ -1206,7 +1241,8 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do !$omp end do - !$omp do schedule(runtime) private(cell1, cell2, sshCell1, sshCell2, sshEdge, thicknessSum, flux) + !$omp do schedule(runtime) & + !$omp private(cell1, cell2, sshCell1, sshCell2, sshEdge, thicknessSum, flux) do iEdge = 1, nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) @@ -1233,6 +1269,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ barotropicThicknessFlux(iEdge) = barotropicThicknessFlux(iEdge) + flux end do !$omp end do + !$omp end parallel block => block % next end do ! block @@ -1262,12 +1299,14 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ nEdges = nEdgesPtr - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge) do iEdge = 1, nEdges normalBarotropicVelocityNew(iEdge) = normalBarotropicVelocityNew(iEdge) & + normalBarotropicVelocitySubcycleNew(iEdge) end do ! iEdge !$omp end do + !$omp end parallel block => block % next end do ! block @@ -1305,7 +1344,8 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ nEdges = nEdgesArray(1) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge) do iEdge = 1, nEdges barotropicThicknessFlux(iEdge) = barotropicThicknessFlux(iEdge) & / (nBtrSubcycles * config_btr_subcycle_loop_factor) @@ -1314,6 +1354,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ / (nBtrSubcycles * config_btr_subcycle_loop_factor + 1) end do !$omp end do + !$omp end parallel block => block % next end do ! block @@ -1378,7 +1419,10 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ useVelocityCorrection = 0 endif - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iEdge, uTemp, normalThicknessFluxSum, thicknessSum, k, & + !$omp normalVelocityCorrection) do iEdge = 1, nEdges ! velocity for normalVelocityCorrectionection is normalBarotropicVelocity + normalBaroclinicVelocity + uBolus @@ -1415,6 +1459,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do ! iEdge !$omp end do + !$omp end parallel deallocate(uTemp) @@ -1586,7 +1631,8 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ ! Only need T & S for earlier iterations, ! then all the tracers needed the last time through. - !$omp do schedule(runtime) private(k, temp_h, temp, i) + !$omp parallel + !$omp do schedule(runtime) private(iCell, k, temp_h, temp, i) do iCell = 1, nCells ! sshNew is a pointer, defined above. do k = 1, maxLevelCell(iCell) @@ -1608,9 +1654,11 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do end do ! iCell !$omp end do + !$omp end parallel if (config_use_freq_filtered_thickness) then - !$omp do schedule(runtime) private(k, temp) + !$omp parallel + !$omp do schedule(runtime) private(iCell, k, temp) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) @@ -1628,9 +1676,11 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do end do !$omp end do + !$omp end parallel end if - !$omp do schedule(runtime) private(k) + !$omp parallel + !$omp do schedule(runtime) private(k, iEdge) do iEdge = 1, nEdges do k = 1, nVertLevels @@ -1645,6 +1695,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do ! iEdge !$omp end do + !$omp end parallel ! Efficiency note: We really only need this to compute layerThicknessEdge, density, pressure, and SSH ! in this diagnostics solve. @@ -1657,7 +1708,8 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! elseif (split_explicit_step == config_n_ts_iter) then - !$omp do schedule(runtime) private(k) + !$omp parallel + !$omp do schedule(runtime) private(k, iCell) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) ! this is h_{n+1} @@ -1665,6 +1717,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do end do !$omp end do + !$omp end parallel if (config_compute_active_tracer_budgets) then call mpas_pool_get_array(diagnosticsPool,'activeTracerHorizontalAdvectionTendency', & @@ -1679,7 +1732,8 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ activeTracerHorizontalAdvectionEdgeFlux) call mpas_pool_get_array(diagnosticsPool, 'layerThicknessEdge', layerThicknessEdge) - !$omp do schedule(runtime) private(k) + !$omp parallel + !$omp do schedule(runtime) private(k, iEdge) do iEdge = 1, nEdges do k= 1, maxLevelEdgeTop(iEdge) activeTracerHorizontalAdvectionEdgeFlux(:,k,iEdge) = & @@ -1689,7 +1743,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ enddo !$omp end do - !$omp do schedule(runtime) private(k) + !$omp do schedule(runtime) private(k, iCell) do iCell = 1, nCells do k= 1, maxLevelCell(iCell) activeTracerHorizontalAdvectionTendency(:,k,iCell) = & @@ -1718,6 +1772,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do end do !$omp end do + !$omp end parallel endif call mpas_pool_begin_iteration(tracersPool) @@ -1733,7 +1788,8 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ modifiedGroupName = trim(groupItr % memberName) // 'Tend' call mpas_pool_get_array(tracersTendPool, modifiedGroupName, tracersGroupTend) - !$omp do schedule(runtime) private(k) + !$omp parallel + !$omp do schedule(runtime) private(k, iCell) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) tracersGroupNew(:,k,iCell) = (tracersGroupCur(:,k,iCell) * layerThicknessCur(k,iCell) + dt & @@ -1741,16 +1797,19 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do end do !$omp end do + !$omp end parallel ! limit salinity in separate loop if ( trim(groupItr % memberName) == 'activeTracers' ) then - !$omp do schedule(runtime) private(k) + !$omp parallel + !$omp do schedule(runtime) private(k, iCell) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) tracersGroupNew(indexSalinity,k,iCell) = max(0.001_RKIND, tracersGroupNew(indexSalinity,k,iCell)) end do end do !$omp end do + !$omp end parallel end if ! Reset debugTracers to fixed value at the surface @@ -1758,7 +1817,8 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ call mpas_pool_get_array(meshPool, 'latCell', latCell) call mpas_pool_get_array(meshPool, 'lonCell', lonCell) if (config_reset_debugTracers_near_surface) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, k, lat) do iCell = 1, nCells ! Reset tracer1 to 2 in top n layers @@ -1802,6 +1862,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end if end do !$omp end do + !$omp end parallel end if end if @@ -1810,7 +1871,8 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do if (config_use_freq_filtered_thickness) then - !$omp do schedule(runtime) private(k) + !$omp parallel + !$omp do schedule(runtime) private(k, iCell) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) @@ -1821,6 +1883,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do end do !$omp end do + !$omp end parallel end if ! Recompute final u to go on to next step. @@ -1832,7 +1895,8 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ ! note that normalBaroclinicVelocity is recomputed at the beginning of the next timestep due to Imp Vert mixing, ! so normalBaroclinicVelocity does not have to be recomputed here. - !$omp do schedule(runtime) private(k) + !$omp parallel + !$omp do schedule(runtime) private(k, iEdge) do iEdge = 1, nEdges do k = 1, maxLevelEdgeTop(iEdge) normalVelocityNew(k,iEdge) = normalBarotropicVelocityNew(iEdge) + 2 * normalBaroclinicVelocityNew(k,iEdge) & @@ -1840,6 +1904,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do end do ! iEdges !$omp end do + !$omp end parallel endif ! split_explicit_step @@ -1966,19 +2031,23 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ nEdges = nEdgesPtr if (config_prescribe_velocity) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge) do iEdge = 1, nEdges normalVelocityNew(:, iEdge) = normalVelocityCur(:, iEdge) end do !$omp end do + !$omp end parallel end if if (config_prescribe_thickness) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell) do iCell = 1, nCells layerThicknessNew(:, iCell) = layerThicknessCur(:, iCell) end do !$omp end do + !$omp end parallel end if call ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnosticsPool, scratchPool, tracersPool, 2) @@ -2006,7 +2075,8 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ call mpas_timer_stop('se final mpas reconstruct') - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell) do iCell = 1, nCells surfaceVelocity(indexSurfaceVelocityZonal, iCell) = velocityZonal(1, iCell) surfaceVelocity(indexSurfaceVelocityMeridional, iCell) = velocityMeridional(1, iCell) @@ -2015,6 +2085,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ SSHGradient(indexSSHGradientMeridional, iCell) = gradSSHMeridional(iCell) end do !$omp end do + !$omp end parallel call ocn_time_average_coupled_accumulate(diagnosticsPool, statePool, forcingPool, 2) From 3181d8aeb2a273d7acb1805b1ff09736222d57e8 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Tue, 31 Mar 2020 12:31:37 -0700 Subject: [PATCH 02/16] Localized OpenMP threading in shared/ Move MPAS-O OpenMP thread parallel regions to be local to the loops in shared/ instead of instantiating the parallel region around the timestep loop. --- src/core_ocean/shared/mpas_ocn_diagnostics.F | 90 +++++++++++++++---- .../shared/mpas_ocn_diagnostics_routines.F | 2 + .../mpas_ocn_effective_density_in_land_ice.F | 1 + .../shared/mpas_ocn_equation_of_state_jm.F | 43 +++++---- .../mpas_ocn_equation_of_state_linear.F | 8 ++ .../shared/mpas_ocn_frazil_forcing.F | 15 +++- .../mpas_ocn_high_freq_thickness_hmix_del2.F | 2 + src/core_ocean/shared/mpas_ocn_sea_ice.F | 6 +- .../shared/mpas_ocn_surface_bulk_forcing.F | 10 +++ .../shared/mpas_ocn_surface_land_ice_fluxes.F | 19 +++- src/core_ocean/shared/mpas_ocn_tendency.F | 34 +++++-- src/core_ocean/shared/mpas_ocn_test.F | 2 + src/core_ocean/shared/mpas_ocn_thick_ale.F | 12 ++- src/core_ocean/shared/mpas_ocn_thick_hadv.F | 2 + .../shared/mpas_ocn_thick_surface_flux.F | 2 + src/core_ocean/shared/mpas_ocn_thick_vadv.F | 2 + .../shared/mpas_ocn_tidal_forcing.F | 2 + .../shared/mpas_ocn_time_average_coupled.F | 30 ++++--- .../shared/mpas_ocn_time_varying_forcing.F | 2 + src/core_ocean/shared/mpas_ocn_tracer_TTD.F | 2 + .../shared/mpas_ocn_tracer_advection_mono.F | 87 +++++++++++++----- .../shared/mpas_ocn_tracer_advection_std.F | 2 + .../mpas_ocn_tracer_exponential_decay.F | 2 + .../shared/mpas_ocn_tracer_hmix_del2.F | 6 +- .../shared/mpas_ocn_tracer_hmix_del4.F | 4 + .../shared/mpas_ocn_tracer_ideal_age.F | 2 + .../mpas_ocn_tracer_interior_restoring.F | 2 + .../shared/mpas_ocn_tracer_nonlocalflux.F | 2 + ..._ocn_tracer_short_wave_absorption_jerlov.F | 4 +- ...cn_tracer_short_wave_absorption_variable.F | 4 +- .../mpas_ocn_tracer_surface_flux_to_tend.F | 4 + .../mpas_ocn_tracer_surface_restoring.F | 2 + ...pas_ocn_vel_forcing_explicit_bottom_drag.F | 2 + .../mpas_ocn_vel_forcing_surface_stress.F | 6 +- .../shared/mpas_ocn_vel_hadv_coriolis.F | 5 +- .../shared/mpas_ocn_vel_hmix_del2.F | 2 + .../shared/mpas_ocn_vel_hmix_del4.F | 18 +++- .../shared/mpas_ocn_vel_hmix_leith.F | 2 + .../shared/mpas_ocn_vel_pressure_grad.F | 18 +++- src/core_ocean/shared/mpas_ocn_vel_vadv.F | 6 +- src/core_ocean/shared/mpas_ocn_vmix.F | 23 ++++- .../shared/mpas_ocn_vmix_coefs_redi.F | 2 + src/core_ocean/shared/mpas_ocn_vmix_cvmix.F | 20 ++++- .../shared/mpas_ocn_wetting_drying.F | 13 +-- 44 files changed, 415 insertions(+), 109 deletions(-) diff --git a/src/core_ocean/shared/mpas_ocn_diagnostics.F b/src/core_ocean/shared/mpas_ocn_diagnostics.F index 26a1216628..6abf089edb 100644 --- a/src/core_ocean/shared/mpas_ocn_diagnostics.F +++ b/src/core_ocean/shared/mpas_ocn_diagnostics.F @@ -315,7 +315,8 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic nVertices = nVerticesArray( size(nVerticesArray) ) if (.not. config_use_wetting_drying .or. (config_use_wetting_drying .and. (trim(config_thickness_flux_type) .eq. 'centered'))) then - !$omp do schedule(runtime) private(cell1, cell2, k) + !$omp parallel + !$omp do schedule(runtime) private(cell1, cell2, k, iEdge) do iEdge = 1, nEdges ! initialize layerThicknessEdge to avoid divide by zero and NaN problems. layerThicknessEdge(:, iEdge) = -1.0e34_RKIND @@ -327,10 +328,12 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic end do end do !$omp end do + !$omp end parallel else if (config_use_wetting_drying .and. (trim(config_thickness_flux_type) .ne. 'centered')) then if ( trim(config_thickness_flux_type) .eq. 'upwind') then - !$omp do schedule(runtime) private(cell1, cell2, k) + !$omp parallel + !$omp do schedule(runtime) private(cell1, cell2, k, iEdge) do iEdge = 1, nEdges ! initialize layerThicknessEdge to avoid divide by zero and NaN problems. layerThicknessEdge(:, iEdge) = -1.0e34_RKIND @@ -348,6 +351,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic end do end do !$omp end do + !$omp end parallel end if else call mpas_log_write('Thickness flux option of ' // trim(config_thickness_flux_type) // 'is not known', MPAS_LOG_CRIT) @@ -361,20 +365,18 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic ! used -1e34 so error clearly occurs if these values are used. ! - !$omp master normalVelocity(:,nEdges+1) = -1e34 layerThickness(:,nCells+1) = -1e34 activeTracers(indexTemperature,:,nCells+1) = -1e34 activeTracers(indexSalinity,:,nCells+1) = -1e34 - !$omp end master - call mpas_threading_barrier() call ocn_relativeVorticity_circulation(relativeVorticity, circulation, meshPool, normalVelocity, err) ! Need owned cells for relativeVorticityCell nCells = nCellsArray( 1 ) - !$omp do schedule(runtime) private(invAreaCell1, i, j, k, iVertex) + !$omp parallel + !$omp do schedule(runtime) private(invAreaCell1, i, j, k, iVertex, iCell) do iCell = 1, nCells relativeVorticityCell(:,iCell) = 0.0_RKIND invAreaCell1 = 1.0_RKIND / areaCell(iCell) @@ -389,6 +391,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic end do end do !$omp end do + !$omp end parallel ! Need 0,1,2 halo cells nCells = nCellsArray( 3 ) @@ -397,7 +400,10 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic ! allocate(div_hu(nVertLevels),div_huTransport(nVertLevels),div_huGMBolus(nVertLevels)) - !$omp do schedule(runtime) private(invAreaCell1, i, k, iEdge, edgeSignOnCell_temp, dcEdge_temp, dvEdge_temp, r_tmp) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(i, iEdge, invAreaCell1, edgeSignOnCell_temp, dcEdge_temp, dvEdge_temp, & + !$omp k, r_tmp, div_hu, div_huTransport, div_huGMBolus) do iCell = 1, nCells divergence(:, iCell) = 0.0_RKIND kineticEnergyCell(:, iCell) = 0.0_RKIND @@ -436,12 +442,14 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic end do end do !$omp end do + !$omp end parallel deallocate(div_hu,div_huTransport,div_huGMBolus) nEdges = nEdgesArray( 2 ) - !$omp do schedule(runtime) private(i, eoe, weightsOnEdge_temp, k) + !$omp parallel + !$omp do schedule(runtime) private(iEdge, i, eoe, weightsOnEdge_temp, k) do iEdge = 1, nEdges tangentialVelocity(:, iEdge) = 0.0_RKIND ! Compute v (tangential) velocities @@ -455,6 +463,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic end do end do !$omp end do + !$omp end parallel ! Need 0,1,2 halo cells nCells = nCellsArray( 3 ) @@ -469,6 +478,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic !$omp end master !$omp barrier + !$omp parallel !$omp do schedule(runtime) private(i, iEdge, r_tmp, k) do iVertex = 1, nVertices kineticEnergyVertex(:, iVertex) = 0.0_RKIND @@ -508,6 +518,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic end do end do !$omp end do + !$omp end parallel !$omp barrier !$omp master @@ -527,6 +538,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic nVertices = nVerticesArray( 3 ) + !$omp parallel !$omp do schedule(runtime) private(invAreaTri1, k, layerThicknessVertex, i) do iVertex = 1, nVertices invAreaTri1 = 1.0_RKIND / areaTriangle(iVertex) @@ -543,9 +555,11 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic end do end do !$omp end do + !$omp end parallel nEdges = nEdgesArray( 3 ) + !$omp parallel !$omp do schedule(runtime) private(vertex1, vertex2, k) do iEdge = 1, nEdges normalizedRelativeVorticityEdge(:, iEdge) = 0.0_RKIND @@ -560,9 +574,11 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic end do end do !$omp end do + !$omp end parallel nCells = nCellsArray( 2 ) + !$omp parallel !$omp do schedule(runtime) private(invAreaCell1, i, j, iVertex, k) do iCell = 1, nCells normalizedRelativeVorticityCell(:, iCell) = 0.0_RKIND @@ -578,6 +594,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic end do end do !$omp end do + !$omp end parallel nCells = nCellsArray( size(nCellsArray) ) nVertices = nVerticesArray( size(nVerticesArray) ) @@ -594,6 +611,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic nEdges = nEdgesArray( 2 ) + !$omp parallel !$omp do schedule(runtime) private(cell1, cell2, vertex1, vertex2, invLength, k) do iEdge = 1,nEdges cell1 = cellsOnEdge(1, iEdge) @@ -633,6 +651,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic enddo enddo !$omp end do + !$omp end parallel !$omp barrier !$omp master @@ -687,7 +706,8 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic allocate(pTop(nVertLevels)) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, pTop, k) do iCell = 1, nCells ! assume atmospheric pressure at the surface is zero for now. @@ -707,11 +727,13 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic end do !$omp end do + !$omp end parallel deallocate(pTop) else + !$omp parallel !$omp do schedule(runtime) private(k) do iCell = 1, nCells ! Pressure for generalized coordinates. @@ -749,6 +771,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic end do !$omp end do + !$omp end parallel endif @@ -758,6 +781,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic ! Brunt-Vaisala frequency (this has units of s^{-2}) ! coef = -gravity / rho_sw + !$omp parallel !$omp do schedule(runtime) private(k) do iCell = 1, nCells BruntVaisalaFreqTop(1,iCell) = 0.0_RKIND @@ -810,6 +834,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic normalVelocitySurfaceLayer(iEdge) = normalVelocity(1, iEdge) end do !$omp end do + !$omp end parallel ! ! average tracer values over the ocean surface layer @@ -818,7 +843,8 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic nCells = nCellsArray( 1 ) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, surfaceLayerDepth, sumSurfaceLayer, k, rSurfaceLayer) do iCell=1,nCells surfaceLayerDepth = config_cvmix_kpp_surface_layer_averaging sumSurfaceLayer=0.0_RKIND @@ -844,6 +870,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic * activeTracers(:,k,iCell) * layerThickness(k,iCell)) / surfaceLayerDepth enddo !$omp end do + !$omp end parallel nEdges = nEdgesArray( 1 ) @@ -852,6 +879,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic ! the ocean surface layer is generally assumed to be about 0.1 of the boundary layer depth ! + !$omp parallel !$omp do schedule(runtime) private(cell1, cell2, surfaceLayerDepth, sumSurfaceLayer, k, rSurfaceLayer) do iEdge=1,nEdges normalVelocitySurfaceLayer(iEdge) = 0.0_RKIND @@ -881,18 +909,21 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic end if enddo !$omp end do + !$omp end parallel endif nCells = nCellsArray( 2 ) ! compute the attenuation coefficient for surface fluxes + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells surfaceFluxAttenuationCoefficient(iCell) = config_flux_attenuation_coefficient surfaceFluxAttenuationCoefficientRunoff(iCell) = config_flux_attenuation_coefficient_runoff end do !$omp end do + !$omp end parallel ! ! compute fields needed to compute land-ice fluxes, either in the ocean model or in the coupler @@ -900,13 +931,15 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic diagnosticsPool, timeLevel) nCells = nCellsArray( 2 ) + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells pressureAdjustedSSH(iCell) = ssh(iCell) + ( seaIcePressure(iCell) / ( gravity * rho_sw ) ) end do - !$omp end do + !$omp end parallel if(associated(landIceDraft)) then + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells ! subtract the land ice draft from the SSH so sea ice doesn't experience tilt @@ -914,10 +947,12 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic pressureAdjustedSSH(iCell) = pressureAdjustedSSH(iCell) - landIceDraft(iCell) end do !$omp end do + !$omp end parallel end if nEdges = nEdgesArray( 2 ) + !$omp parallel !$omp do schedule(runtime) private(cell1, cell2) do iEdge = 1, nEdges cell1 = cellsOnEdge(1, iEdge) @@ -926,6 +961,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic gradSSH(iEdge) = edgeMask(1, iEdge) * ( pressureAdjustedSSH(cell2) - pressureAdjustedSSH(cell1) ) / dcEdge(iEdge) end do !$omp end do + !$omp end parallel call mpas_timer_stop('diagnostic solve') @@ -1039,6 +1075,7 @@ subroutine ocn_vert_transport_velocity_top(meshPool, verticalMeshPool, scratchPo ! thickness-weighted divergence and barotropic divergence ! ! See Ringler et al. (2010) jcp paper, eqn 19, 21, and fig. 3. + !$omp parallel !$omp do schedule(runtime) private(invAreaCell, i, iEdge, k, flux, div_hu_btr) do iCell = 1, nCells div_hu(:,iCell) = 0.0_RKIND @@ -1057,6 +1094,7 @@ subroutine ocn_vert_transport_velocity_top(meshPool, verticalMeshPool, scratchPo projectedSSH(iCell) = oldSSH(iCell) - dt*div_hu_btr end do !$omp end do + !$omp end parallel ! ! Compute desired thickness at new time @@ -1076,6 +1114,7 @@ subroutine ocn_vert_transport_velocity_top(meshPool, verticalMeshPool, scratchPo ! Here we are using solving the continuity equation for vertAleTransportTop ($w^t$), ! and using ALE_Thickness for thickness at the new time. + !$omp parallel !$omp do schedule(runtime) private(k) do iCell = 1,nCells vertAleTransportTop(1,iCell) = 0.0_RKIND @@ -1086,6 +1125,7 @@ subroutine ocn_vert_transport_velocity_top(meshPool, verticalMeshPool, scratchPo end do end do !$omp end do + !$omp end parallel !$omp barrier !$omp master @@ -1152,6 +1192,7 @@ subroutine ocn_fuperp(statePool, meshPool, timeLevelIn)!{{{ ! ! Put f*normalBaroclinicVelocity^{perp} in u as a work variable ! + !$omp parallel !$omp do schedule(runtime) private(cell1, cell2, k, eoe) do iEdge = 1, nEdgesSolve cell1 = cellsOnEdge(1,iEdge) @@ -1168,6 +1209,7 @@ subroutine ocn_fuperp(statePool, meshPool, timeLevelIn)!{{{ end do end do !$omp end do + !$omp end parallel call mpas_timer_stop("ocn_fuperp") @@ -1215,7 +1257,8 @@ subroutine ocn_filter_btr_mode_vel(statePool, diagnosticsPool, meshPool, timeLev call mpas_pool_get_dimension(meshPool, 'nEdges', nEdges) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge, normalThicknessFluxSum, thicknessSum, k, vertSum) do iEdge = 1, nEdges ! thicknessSum is initialized outside the loop because on land boundaries @@ -1235,6 +1278,7 @@ subroutine ocn_filter_btr_mode_vel(statePool, diagnosticsPool, meshPool, timeLev enddo enddo ! iEdge !$omp end do + !$omp end parallel call mpas_timer_stop("ocn_filter_btr_mode_vel") @@ -1284,7 +1328,8 @@ subroutine ocn_filter_btr_mode_tend_vel(tendPool, statePool, diagnosticsPool, me call mpas_pool_get_dimension(meshPool, 'nEdges', nEdges) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge, normalThicknessFluxSum, thicknessSum, k, vertSum) do iEdge = 1, nEdges ! thicknessSum is initialized outside the loop because on land boundaries @@ -1304,6 +1349,7 @@ subroutine ocn_filter_btr_mode_tend_vel(tendPool, statePool, diagnosticsPool, me enddo enddo ! iEdge !$omp end do + !$omp end parallel call mpas_timer_stop("ocn_filter_btr_mode_tend_vel") @@ -1491,7 +1537,10 @@ subroutine ocn_compute_KPP_input_fields(statePool, forcingPool, meshPool, diagno densitySurfaceDisplaced, err, thermalExpansionCoeff, salineContractionCoeff, & timeLevel) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iCell, invAreaCell, fracAbsorbed, fracAbsorbedRunoff, deltaVelocitySquared, i, & + !$omp iEdge, factor, delU2) do iCell = 1, nCells ! compute surface buoyancy forcing based on surface fluxes of mass, temperature, salinity and frazil ! (frazil to be added later) @@ -1549,6 +1598,7 @@ subroutine ocn_compute_KPP_input_fields(statePool, forcingPool, meshPool, diagno enddo !$omp end do + !$omp end parallel ! deallocate scratch space !$omp barrier @@ -1704,6 +1754,7 @@ subroutine ocn_compute_land_ice_flux_input_fields(meshPool, statePool, & end if ! Compute top drag + !$omp parallel !$omp do schedule(runtime) private(cell1, cell2, velocityMagnitude, landIceEdgeFraction) do iEdge = 1, nEdges cell1 = cellsOnEdge(1, iEdge) @@ -1733,10 +1784,8 @@ subroutine ocn_compute_land_ice_flux_input_fields(meshPool, statePool, & end do !$omp end do - - ! average temperature and salinity over horizontal neighbors and the sub-ice-shelf boundary layer - !$omp do schedule(runtime) + !$omp do schedule(runtime) private(iCell, blThickness, iLevel, dz) do iCell = 1, nCells blThickness = 0.0_RKIND blTempScratch(iCell) = 0.0_RKIND @@ -1774,8 +1823,10 @@ subroutine ocn_compute_land_ice_flux_input_fields(meshPool, statePool, & end if end do !$omp end do + !$omp end parallel if(jenkinsOn) then + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells ! transfer coefficients from namelist @@ -1785,7 +1836,9 @@ subroutine ocn_compute_land_ice_flux_input_fields(meshPool, statePool, & * config_land_ice_flux_jenkins_salt_transfer_coefficient end do !$omp end do + !$omp end parallel else if(hollandJenkinsOn) then + !$omp parallel !$omp do schedule(runtime) private(h_nu, Gamma_turb) do iCell = 1, nCells ! friction-velocity dependent non-dimensional transfer coefficients from @@ -1804,6 +1857,7 @@ subroutine ocn_compute_land_ice_flux_input_fields(meshPool, statePool, & * Sc**(2.0_RKIND/3.0_RKIND) - 6.0_RKIND) end do !$omp end do + !$omp end parallel end if !$omp barrier @@ -1813,6 +1867,7 @@ subroutine ocn_compute_land_ice_flux_input_fields(meshPool, statePool, & !$omp end master ! modify the spatially-varying attenuation coefficient where there is land ice + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells if(landIceMask(iCell) == 1) then @@ -1820,6 +1875,7 @@ subroutine ocn_compute_land_ice_flux_input_fields(meshPool, statePool, & end if end do !$omp end do + !$omp end parallel call mpas_timer_stop("land_ice_diagnostic_fields") diff --git a/src/core_ocean/shared/mpas_ocn_diagnostics_routines.F b/src/core_ocean/shared/mpas_ocn_diagnostics_routines.F index bbf502c935..c03e0d686b 100644 --- a/src/core_ocean/shared/mpas_ocn_diagnostics_routines.F +++ b/src/core_ocean/shared/mpas_ocn_diagnostics_routines.F @@ -120,6 +120,7 @@ subroutine ocn_relativeVorticity_circulation(relativeVorticity, circulation, mes err = 0 + !$omp parallel !$omp do schedule(runtime) private(invAreaTri1, i, iEdge, k, r_tmp) do iVertex = 1, nVertices circulation(:, iVertex) = 0.0_RKIND @@ -136,6 +137,7 @@ subroutine ocn_relativeVorticity_circulation(relativeVorticity, circulation, mes end do end do !$omp end do + !$omp end parallel !-------------------------------------------------------------------- diff --git a/src/core_ocean/shared/mpas_ocn_effective_density_in_land_ice.F b/src/core_ocean/shared/mpas_ocn_effective_density_in_land_ice.F index bd212fe1b3..46ed50b989 100644 --- a/src/core_ocean/shared/mpas_ocn_effective_density_in_land_ice.F +++ b/src/core_ocean/shared/mpas_ocn_effective_density_in_land_ice.F @@ -154,6 +154,7 @@ subroutine ocn_effective_density_in_land_ice_update(meshPool, forcingPool, state !$omp end master !$omp barrier + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells ! TODO: should only apply to floating land ice, once wetting/drying is supported diff --git a/src/core_ocean/shared/mpas_ocn_equation_of_state_jm.F b/src/core_ocean/shared/mpas_ocn_equation_of_state_jm.F index 08ddd62169..9006e294f1 100644 --- a/src/core_ocean/shared/mpas_ocn_equation_of_state_jm.F +++ b/src/core_ocean/shared/mpas_ocn_equation_of_state_jm.F @@ -299,15 +299,13 @@ subroutine ocn_equation_of_state_jm_density_only(nVertLevels, & !*** compute modified T,S to account for displacement and !*** valid range - !$omp master allocate(tracerTemp(nVertLevels,nCells)) allocate(tracerSalt(nVertLevels,nCells)) - !$omp end master - !$omp barrier if (displacementType == 'surfaceDisplaced') then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, k, tq, sq) do iCell=1,nCells do k=1,nVertLevels tq = min(tracersSurfaceLayerValue(indexT,iCell), & @@ -319,10 +317,12 @@ subroutine ocn_equation_of_state_jm_density_only(nVertLevels, & enddo end do !$omp end do + !$omp end parallel else - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, k, tq, sq) do iCell=1,nCells do k = 1, nVertLevels tq = min(tracers(indexT,k,iCell), ocnEqStateTmax) @@ -332,16 +332,18 @@ subroutine ocn_equation_of_state_jm_density_only(nVertLevels, & end do end do !$omp end do + !$omp end parallel endif #ifdef MPAS_OPENACC - !$omp master !$acc enter data copyin(density, p, p2, tracerTemp, tracerSalt) !$acc parallel loop gang vector collapse(2) & !$acc& present(density, p, p2, tracerTemp, tracerSalt) #else - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iCell, k, sq, tq, sqr, t2, work1, work2, rhosfc, work3, work4, bulkMod) #endif do iCell=1,nCells do k=1,nVertLevels @@ -392,17 +394,14 @@ subroutine ocn_equation_of_state_jm_density_only(nVertLevels, & #ifdef MPAS_OPENACC !$acc update host(density) !$acc exit data delete(density, p, p2, tracerTemp, tracerSalt) - !$omp end master - !$omp barrier #else !$omp end do + !$omp end parallel #endif deallocate(p,p2) - !$omp master deallocate(tracerTemp) deallocate(tracerSalt) - !$omp end master !-------------------------------------------------------------------- @@ -571,15 +570,13 @@ subroutine ocn_equation_of_state_jm_density_exp(nVertLevels, & !*** compute modified T,S to account for displacement and !*** valid range - !$omp master allocate(tracerTemp(nVertLevels,nCells)) allocate(tracerSalt(nVertLevels,nCells)) - !$omp end master - !$omp barrier if (displacementType == 'surfaceDisplaced') then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, k, tq, sq) do iCell=1,nCells do k=1,nVertLevels tq = min(tracersSurfaceLayerValue(indexT,iCell), & @@ -591,10 +588,12 @@ subroutine ocn_equation_of_state_jm_density_exp(nVertLevels, & enddo end do !$omp end do + !$omp end parallel else - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, k, tq, sq) do iCell=1,nCells do k = 1, nVertLevels tq = min(tracers(indexT,k,iCell), ocnEqStateTmax) @@ -604,11 +603,11 @@ subroutine ocn_equation_of_state_jm_density_exp(nVertLevels, & end do end do !$omp end do + !$omp end parallel endif #ifdef MPAS_OPENACC - !$omp master !$acc enter data copyin(density, p, p2, tracerTemp, tracerSalt, & !$acc& thermalExpansionCoeff, & !$acc& salineContractionCoeff) @@ -617,7 +616,10 @@ subroutine ocn_equation_of_state_jm_density_exp(nVertLevels, & !$acc& thermalExpansionCoeff, & !$acc& salineContractionCoeff) #else - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iCell, k, sq, tq, sqr, t2, work1, work2, rhosfc, work3, work4, bulkMod, & + !$omp denomk, drdt0, dkdt, drhodt, drds0, dkds, drhods) #endif do iCell=1,nCells do k=1,nVertLevels @@ -717,17 +719,14 @@ subroutine ocn_equation_of_state_jm_density_exp(nVertLevels, & !$acc exit data delete(density, p, p2, tracerTemp, tracerSalt, & !$acc& thermalExpansionCoeff, & !$acc& salineContractionCoeff) - !$omp end master - !$omp barrier #else !$omp end do + !$omp end parallel #endif deallocate(p,p2) - !$omp master deallocate(tracerTemp) deallocate(tracerSalt) - !$omp end master !-------------------------------------------------------------------- diff --git a/src/core_ocean/shared/mpas_ocn_equation_of_state_linear.F b/src/core_ocean/shared/mpas_ocn_equation_of_state_linear.F index 6bf8679803..d320a61882 100644 --- a/src/core_ocean/shared/mpas_ocn_equation_of_state_linear.F +++ b/src/core_ocean/shared/mpas_ocn_equation_of_state_linear.F @@ -172,6 +172,7 @@ subroutine ocn_equation_of_state_linear_density_only( & if (trim(displacementType) == 'surfaceDisplaced') then + !$omp parallel !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, nVertLevels @@ -186,9 +187,11 @@ subroutine ocn_equation_of_state_linear_density_only( & end do end do !$omp end do + !$omp end parallel else ! all other displacement types + !$omp parallel !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, nVertLevels @@ -203,6 +206,7 @@ subroutine ocn_equation_of_state_linear_density_only( & end do end do !$omp end do + !$omp end parallel endif @@ -306,6 +310,7 @@ subroutine ocn_equation_of_state_linear_density_exp( & if (trim(displacementType) == 'surfaceDisplaced') then + !$omp parallel !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, nVertLevels @@ -326,9 +331,11 @@ subroutine ocn_equation_of_state_linear_density_exp( & end do end do !$omp end do + !$omp end parallel else ! all other displacement types + !$omp parallel !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, nVertLevels @@ -349,6 +356,7 @@ subroutine ocn_equation_of_state_linear_density_exp( & end do end do !$omp end do + !$omp end parallel endif diff --git a/src/core_ocean/shared/mpas_ocn_frazil_forcing.F b/src/core_ocean/shared/mpas_ocn_frazil_forcing.F index c5ecf88f9f..58d7f56e53 100644 --- a/src/core_ocean/shared/mpas_ocn_frazil_forcing.F +++ b/src/core_ocean/shared/mpas_ocn_frazil_forcing.F @@ -183,6 +183,7 @@ subroutine ocn_frazil_forcing_layer_thickness(meshPool, forcingPool, layerThickn nCells = nCellsArray( 2 ) ! Build surface fluxes at cell centers + !$omp parallel !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) @@ -190,6 +191,7 @@ subroutine ocn_frazil_forcing_layer_thickness(meshPool, forcingPool, layerThickn end do end do !$omp end do + !$omp end parallel call mpas_timer_stop("frazil thickness tendency") @@ -266,6 +268,7 @@ subroutine ocn_frazil_forcing_active_tracers(meshPool, tracersPool, forcingPool, nCells = nCellsArray( 2 ) ! add to surface fluxes at cell centers + !$omp parallel !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) @@ -275,6 +278,7 @@ subroutine ocn_frazil_forcing_active_tracers(meshPool, tracersPool, forcingPool, end do end do !$omp end do + !$omp end parallel end subroutine ocn_frazil_forcing_active_tracers!}}} @@ -427,7 +431,13 @@ subroutine ocn_frazil_forcing_build_arrays(domain, meshPool, forcingPool, diagno nCells = nCellsArray( 2 ) ! loop over all columns - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(underLandIce, kBottomFrazil, columnTemperatureMin, k, sumNewFrazilIceThickness, & + !$omp sumNewThicknessWeightedSaltContent, oceanFreezingTemperature, potential, & + !$omp freezingEnergy, meltingEnergy, frazilSalinity, newFrazilIceThickness, & + !$omp newThicknessWeightedSaltContent, meltedFrazilIceThickness, & + !$omp meltedThicknessWeightedSaltContent) do iCell=1,nCells underLandIce = .false. @@ -583,13 +593,16 @@ subroutine ocn_frazil_forcing_build_arrays(domain, meshPool, forcingPool, diagno enddo ! do iCell = 1, nCells !$omp end do + !$omp end parallel if ( config_frazil_use_surface_pressure ) then + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells frazilSurfacePressure(iCell) = accumulatedFrazilIceMassNew(iCell) * gravity end do !$omp end do + !$omp end parallel end if call mpas_timer_stop("frazil") diff --git a/src/core_ocean/shared/mpas_ocn_high_freq_thickness_hmix_del2.F b/src/core_ocean/shared/mpas_ocn_high_freq_thickness_hmix_del2.F index 3b82d0b50a..281418950b 100644 --- a/src/core_ocean/shared/mpas_ocn_high_freq_thickness_hmix_del2.F +++ b/src/core_ocean/shared/mpas_ocn_high_freq_thickness_hmix_del2.F @@ -136,6 +136,7 @@ subroutine ocn_high_freq_thickness_hmix_del2_tend(meshPool, highFreqThickness, t call mpas_pool_get_array(meshPool, 'edgesOnCell', edgesOnCell) call mpas_pool_get_array(meshPool, 'edgeSignOnCell', edgeSignOnCell) + !$omp parallel !$omp do schedule(runtime) private(invAreaCell, i, iEdge, cell1, cell2, r_tmp, k, hhf_turb_flux, flux) do iCell = 1, nCells invAreaCell = 1.0_RKIND / areaCell(iCell) @@ -159,6 +160,7 @@ subroutine ocn_high_freq_thickness_hmix_del2_tend(meshPool, highFreqThickness, t end do end do !$omp end do + !$omp end parallel call mpas_timer_stop("thick hmix del2") diff --git a/src/core_ocean/shared/mpas_ocn_sea_ice.F b/src/core_ocean/shared/mpas_ocn_sea_ice.F index 8caf1129f4..432ea6b5a7 100644 --- a/src/core_ocean/shared/mpas_ocn_sea_ice.F +++ b/src/core_ocean/shared/mpas_ocn_sea_ice.F @@ -136,7 +136,10 @@ subroutine ocn_sea_ice_formation(meshPool, indexTemperature, indexSalinity, laye density_ice = rho_ice - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iCell, maxLevel, netEnergyChange, k, freezingTemp, availableEnergyChange, & + !$omp energyChange, temperatureChange, thicknessChange, iceThicknessChange, iTracer) do iCell = 1, nCellsSolve ! Check performance of these two loop definitions ! do iCell = nCellsSolve, 1, -1 maxLevel = min(maxLevelCell(iCell), verticalLevelCap) @@ -242,6 +245,7 @@ subroutine ocn_sea_ice_formation(meshPool, indexTemperature, indexSalinity, laye end if end do !$omp end do + !$omp end parallel deallocate(iceTracer) diff --git a/src/core_ocean/shared/mpas_ocn_surface_bulk_forcing.F b/src/core_ocean/shared/mpas_ocn_surface_bulk_forcing.F index 71a872bceb..bc3ba013a3 100644 --- a/src/core_ocean/shared/mpas_ocn_surface_bulk_forcing.F +++ b/src/core_ocean/shared/mpas_ocn_surface_bulk_forcing.F @@ -197,6 +197,7 @@ subroutine ocn_surface_bulk_forcing_vel(meshPool, forcingPool, surfaceStress, su nCells = nCellsArray( 3 ) ! Convert coupled climate model wind stress to MPAS-O wind stress + !$omp parallel !$omp do schedule(runtime) private(cell1, cell2, zonalAverage, meridionalAverage) do iEdge = 1, nEdges cell1 = cellsOnEdge(1, iEdge) @@ -217,6 +218,7 @@ subroutine ocn_surface_bulk_forcing_vel(meshPool, forcingPool, surfaceStress, su + windStressMeridional(iCell)**2 ) end do !$omp end do + !$omp end parallel call mpas_timer_stop("bulk_ws") @@ -295,6 +297,7 @@ subroutine ocn_surface_bulk_forcing_thick(meshPool, forcingPool, surfaceThicknes nCells = nCellsArray( 3 ) ! Build surface fluxes at cell centers + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells surfaceThicknessFlux(iCell) = surfaceThicknessFlux(iCell) + ( snowFlux(iCell) + rainFlux(iCell) + evaporationFlux(iCell) & @@ -302,6 +305,7 @@ subroutine ocn_surface_bulk_forcing_thick(meshPool, forcingPool, surfaceThicknes surfaceThicknessFluxRunoff(iCell) = riverRunoffFlux(iCell) / rho_sw end do !$omp end do + !$omp end parallel call mpas_timer_stop("bulk_thick") @@ -429,6 +433,7 @@ subroutine ocn_surface_bulk_forcing_active_tracers(meshPool, forcingPool, tracer nCells = nCellsArray( 3 ) ! Build surface fluxes at cell centers + !$omp parallel !$omp do schedule(runtime) private(allowedSalt, requiredSalt) do iCell = 1, nCells tracersSurfaceFlux(index_temperature_flux, iCell) = tracersSurfaceFlux(index_temperature_flux, iCell) & @@ -457,6 +462,7 @@ subroutine ocn_surface_bulk_forcing_active_tracers(meshPool, forcingPool, tracer end if end do !$omp end do + !$omp end parallel ! assume that snow comes in at 0 C ! Surface fluxes of water have an associated heat content, but the coupled system does not account for this @@ -465,6 +471,7 @@ subroutine ocn_surface_bulk_forcing_active_tracers(meshPool, forcingPool, tracer ! Only include this heat forcing when bulk thickness is turned on ! indices on tracerGroup are (iTracer, iLevel, iCell) if (bulkThicknessFluxOn) then + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells @@ -488,14 +495,17 @@ subroutine ocn_surface_bulk_forcing_active_tracers(meshPool, forcingPool, tracer end do !$omp end do + !$omp end parallel endif ! bulkThicknessFluxOn ! convert short wave heat flux to a temperature flux + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells penetrativeTemperatureFlux(iCell) = shortWaveHeatFlux(iCell) * hflux_factor end do !$omp end do + !$omp end parallel end subroutine ocn_surface_bulk_forcing_active_tracers!}}} diff --git a/src/core_ocean/shared/mpas_ocn_surface_land_ice_fluxes.F b/src/core_ocean/shared/mpas_ocn_surface_land_ice_fluxes.F index 7bb3c9d3da..b6f661851e 100644 --- a/src/core_ocean/shared/mpas_ocn_surface_land_ice_fluxes.F +++ b/src/core_ocean/shared/mpas_ocn_surface_land_ice_fluxes.F @@ -205,6 +205,7 @@ subroutine ocn_surface_land_ice_fluxes_vel(meshPool, diagnosticsPool, surfaceStr call mpas_pool_get_array(diagnosticsPool, 'topDrag', topDrag) call mpas_pool_get_array(diagnosticsPool, 'topDragMagnitude', topDragMagnitude) + !$omp parallel !$omp do schedule(runtime) do iEdge = 1, nEdges surfaceStress(iEdge) = surfaceStress(iEdge) + topDrag(iEdge) @@ -217,6 +218,7 @@ subroutine ocn_surface_land_ice_fluxes_vel(meshPool, diagnosticsPool, surfaceStr surfaceStressMagnitude(iCell) = surfaceStressMagnitude(iCell) + topDragMagnitude(iCell) end do !$omp end do + !$omp end parallel call mpas_timer_stop("top_drag") @@ -284,11 +286,13 @@ subroutine ocn_surface_land_ice_fluxes_thick(meshPool, forcingPool, surfaceThick call mpas_pool_get_array(forcingPool, 'landIceFreshwaterFlux', landIceFreshwaterFlux) ! Build surface fluxes at cell centers + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells surfaceThicknessFlux(iCell) = surfaceThicknessFlux(iCell) + landIceFreshwaterFlux(iCell) / rho_sw end do !$omp end do + !$omp end parallel call mpas_timer_stop("land_ice_thick") @@ -352,11 +356,13 @@ subroutine ocn_surface_land_ice_fluxes_active_tracers(meshPool, forcingPool, tra call mpas_pool_get_array(forcingPool, 'landIceHeatFlux', landIceHeatFlux) ! add to surface fluxes at cell centers + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells tracersSurfaceFlux(1, iCell) = tracersSurfaceFlux(1, iCell) + landIceHeatFlux(iCell)/(rho_sw*cp_sw) end do !$omp end do + !$omp end parallel end subroutine ocn_surface_land_ice_fluxes_active_tracers!}}} @@ -488,7 +494,8 @@ subroutine ocn_surface_land_ice_fluxes_build_arrays(meshPool, diagnosticsPool, & nCells = nCellsArray( size(nCellsArray) ) if(isomipOn) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, freshwaterFlux, heatFlux) do iCell = 1, nCells if (landIceMask(iCell) == 0) cycle @@ -518,6 +525,7 @@ subroutine ocn_surface_land_ice_fluxes_build_arrays(meshPool, diagnosticsPool, & end do !$omp end do + !$omp end parallel end if if(jenkinsOn .or. hollandJenkinsOn) then @@ -771,7 +779,8 @@ subroutine compute_melt_fluxes( & err = 0 Tlatent = latent_heat_fusion_mks/cp_sw - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, T0, dTf_dS, transferVelocityRatio, a, b, c) do iCell = 1, nCells if (mask(iCell) == 0) cycle @@ -815,6 +824,7 @@ subroutine compute_melt_fluxes( & outIceHeatFlux(iCell) = -cp_land_ice*outFreshwaterFlux(iCell)*outInterfaceTemperature(iCell) end do !$omp end do + !$omp end parallel !-------------------------------------------------------------------- @@ -916,7 +926,9 @@ subroutine compute_HJ99_melt_fluxes( & err = 0 cpRatio = cp_land_ice/cp_sw - !$omp do schedule(runtime) reduction(+:err) + !$omp parallel + !$omp do schedule(runtime) reduction(+:err) & + !$omp private(iCell, T0, dTf_dS, transferVelocityRatio, Tlatent, eta, TlatentStar, a, b, c) do iCell = 1, nCells if (mask(iCell) == 0) cycle @@ -958,6 +970,7 @@ subroutine compute_HJ99_melt_fluxes( & outIceHeatFlux(iCell) = -cp_land_ice*outFreshwaterFlux(iCell)*iceTemperature(iCell) end do !$omp end do + !$omp end parallel !-------------------------------------------------------------------- diff --git a/src/core_ocean/shared/mpas_ocn_tendency.F b/src/core_ocean/shared/mpas_ocn_tendency.F index 18b5f0c4fe..de62dec2e5 100644 --- a/src/core_ocean/shared/mpas_ocn_tendency.F +++ b/src/core_ocean/shared/mpas_ocn_tendency.F @@ -153,6 +153,7 @@ subroutine ocn_tend_thick(tendPool, forcingPool, diagnosticsPool, meshPool)!{{{ ! ! height tendency: start accumulating tendency terms ! + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells tend_layerThickness(:, iCell) = 0.0_RKIND @@ -160,6 +161,7 @@ subroutine ocn_tend_thick(tendPool, forcingPool, diagnosticsPool, meshPool)!{{{ surfaceThicknessFluxRunoff(iCell) = 0.0_RKIND end do !$omp end do + !$omp end parallel if(config_disable_thick_all_tend) return @@ -296,6 +298,7 @@ subroutine ocn_tend_vel(tendPool, statePool, forcingPool, diagnosticsPool, meshP ! ! velocity tendency: start accumulating tendency terms ! + !$omp parallel !$omp do schedule(runtime) do iEdge = 1, nEdges tend_normalVelocity(:, iEdge) = 0.0_RKIND @@ -308,6 +311,7 @@ subroutine ocn_tend_vel(tendPool, statePool, forcingPool, diagnosticsPool, meshP surfaceStressMagnitude(iCell) = 0.0_RKIND end do !$omp end do + !$omp end parallel if(config_disable_vel_all_tend) return @@ -394,11 +398,13 @@ subroutine ocn_tend_vel(tendPool, statePool, forcingPool, diagnosticsPool, meshP ! ! velocity tendency: zero if drying ! + !$omp parallel !$omp do schedule(runtime) do iEdge = 1, nEdges tend_normalVelocity(:, iEdge) = tend_normalVelocity(:, iEdge) * (1.0_RKIND - wettingVelocity(:, iEdge)) end do !$omp end do + !$omp end parallel ! ! velocity tendency: vertical mixing d/dz( nu_v du/dz)) @@ -590,6 +596,7 @@ subroutine ocn_tend_tracer(tendPool, statePool, forcingPool, diagnosticsPool, me ! ! transport velocity for the tracer. ! + !$omp parallel !$omp do schedule(runtime) private(k) do iEdge = 1, nEdges do k = 1, nVertLevels @@ -597,6 +604,7 @@ subroutine ocn_tend_tracer(tendPool, statePool, forcingPool, diagnosticsPool, me end do end do !$omp end do + !$omp end parallel ! ! begin iterate over tracer categories @@ -654,32 +662,33 @@ subroutine ocn_tend_tracer(tendPool, statePool, forcingPool, diagnosticsPool, me ! ! initialize tracer surface flux and tendency to zero. ! + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells tracerGroupTend(:,:, iCell) = 0.0_RKIND tracerGroupSurfaceFlux(:, iCell) = 0.0_RKIND end do !$omp end do + !$omp end parallel ! ! fill components of surface tracer flux ! if (config_use_tracerGroup_surface_bulk_forcing) then + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells tracerGroupSurfaceFluxRunoff(:, iCell) = 0.0_RKIND tracerGroupSurfaceFluxRemoved(:, iCell) = 0.0_RKIND end do !$omp end do + !$omp end parallel call ocn_surface_bulk_forcing_tracers(meshPool, groupItr % memberName, forcingPool, tracerGroup, & tracerGroupSurfaceFlux, tracerGroupSurfaceFluxRunoff, & tracerGroupSurfaceFluxRemoved, dt, layerThickness, err) end if - - !$omp master - ! ! compute ecosystem source-sink tendencies and net surface fluxes ! NOTE: must be called before ocn_tracer_surface_flux_tend @@ -725,10 +734,6 @@ subroutine ocn_tend_tracer(tendPool, statePool, forcingPool, diagnosticsPool, me nTracerGroup, nCellsSolve, zMid, indexTemperature, indexSalinity, tracerGroupSurfaceFlux, err) endif - !$omp end master - call mpas_threading_barrier() - - ! ! ocean surface restoring ! if (config_use_tracerGroup_surface_restoring) then @@ -880,11 +885,13 @@ subroutine ocn_tend_tracer(tendPool, statePool, forcingPool, diagnosticsPool, me ! if (config_compute_active_tracer_budgets) then if ( trim(groupItr % memberName) == 'activeTracers' ) then + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells activeTracerSurfaceFluxTendency(:,:,iCell) = tracerGroupTend(:,:,iCell) end do !$omp end do + !$omp end parallel endif endif @@ -898,6 +905,7 @@ subroutine ocn_tend_tracer(tendPool, statePool, forcingPool, diagnosticsPool, me if ( trim(groupItr % memberName) == 'activeTracers' ) then if (config_compute_active_tracer_budgets) then + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells activeTracerSurfaceFluxTendency(:,:,iCell) = tracerGroupTend(:,:,iCell) & @@ -905,18 +913,21 @@ subroutine ocn_tend_tracer(tendPool, statePool, forcingPool, diagnosticsPool, me temperatureShortWaveTendency(:,iCell) = tracerGroupTend(indexTemperature,:,iCell) end do !$omp end do + !$omp end parallel endif call ocn_tracer_short_wave_absorption_tend(meshPool, swForcingPool, forcingPool, indexTemperature, & layerThickness, penetrativeTemperatureFlux, penetrativeTemperatureFluxOBL, tracerGroupTend, err) if (config_compute_active_tracer_budgets) then + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells temperatureShortWaveTendency(:,iCell) = tracerGroupTend(indexTemperature,:,iCell) - & temperatureShortWaveTendency(:,iCell) end do !$omp end do + !$omp end parallel endif endif @@ -929,19 +940,23 @@ subroutine ocn_tend_tracer(tendPool, statePool, forcingPool, diagnosticsPool, me if (.not. config_cvmix_kpp_nonlocal_with_implicit_mix) then if( trim(groupItr % memberName) == 'activeTracers' ) then if (config_compute_active_tracer_budgets) then + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells activeTracerNonLocalTendency(:,:,iCell) = tracerGroupTend(:,:,iCell) end do !$omp end do + !$omp end parallel endif call ocn_tracer_nonlocalflux_tend(meshPool, vertNonLocalFlux, nonLocalSurfaceTracerFlux, tracerGroupTend, err) if (config_compute_active_tracer_budgets) then + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells activeTracerNonLocalTendency(:,:,iCell) = tracerGroupTend(:,:,iCell)-activeTracerNonLocalTendency(:,:,iCell) end do !$omp end do + !$omp end parallel endif else call ocn_tracer_nonlocalflux_tend(meshPool, vertNonLocalFlux, tracerGroupSurfaceFlux, tracerGroupTend, err) @@ -1045,7 +1060,9 @@ subroutine ocn_tend_freq_filtered_thickness(tendPool, statePool, diagnosticsPool allocate(div_hu(nVertLevels)) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(div_hu, div_hu_btr, invAreaCell, i, iEdge, k, flux, totalThickness) do iCell = 1, nCells tend_lowFreqDivergence(:, iCell) = 0.0_RKIND tend_highFreqThickness(:, iCell) = 0.0_RKIND @@ -1078,6 +1095,7 @@ subroutine ocn_tend_freq_filtered_thickness(tendPool, statePool, diagnosticsPool end do end do !$omp end do + !$omp end parallel deallocate(div_hu) diff --git a/src/core_ocean/shared/mpas_ocn_test.F b/src/core_ocean/shared/mpas_ocn_test.F index 5bb2336c46..d42d5e372a 100644 --- a/src/core_ocean/shared/mpas_ocn_test.F +++ b/src/core_ocean/shared/mpas_ocn_test.F @@ -335,6 +335,7 @@ subroutine ocn_init_gm_test_functions(diagnosticsPool, meshPool, scratchPool)!{{ c1 = R*(1-exp(-zBot/L))/(exp(zBot/L) - exp(-zBot/L)) c2 = R-c1 + !$omp parallel !$omp do schedule(runtime) private(k, zTop) do iCell = 1, nCells @@ -357,6 +358,7 @@ subroutine ocn_init_gm_test_functions(diagnosticsPool, meshPool, scratchPool)!{{ end do !$omp end do + !$omp end parallel !$omp barrier !$omp master diff --git a/src/core_ocean/shared/mpas_ocn_thick_ale.F b/src/core_ocean/shared/mpas_ocn_thick_ale.F index feeb616892..1599bdc18c 100644 --- a/src/core_ocean/shared/mpas_ocn_thick_ale.F +++ b/src/core_ocean/shared/mpas_ocn_thick_ale.F @@ -144,6 +144,7 @@ subroutine ocn_ALE_thickness(meshPool, verticalMeshPool, SSH, ALE_thickness, err ! if (configALEthicknessProportionality==1) then ! restingThickness_times_weights + !$omp parallel !$omp do schedule(runtime) private(kMax, thicknessSum, k) do iCell = 1, nCells kMax = maxLevelCell(iCell) @@ -162,8 +163,10 @@ subroutine ocn_ALE_thickness(meshPool, verticalMeshPool, SSH, ALE_thickness, err end do enddo !$omp end do + !$omp end parallel elseif (configALEthicknessProportionality==2) then ! weights_only + !$omp parallel !$omp do schedule(runtime) private(kMax, weightSum, k) do iCell = 1, nCells kMax = maxLevelCell(iCell) @@ -184,9 +187,11 @@ subroutine ocn_ALE_thickness(meshPool, verticalMeshPool, SSH, ALE_thickness, err end do enddo !$omp end do + !$omp end parallel endif if (thicknessFilterActive) then + !$omp parallel !$omp do schedule(runtime) private(kMax) do iCell = 1, nCells kMax = maxLevelCell(iCell) @@ -196,6 +201,7 @@ subroutine ocn_ALE_thickness(meshPool, verticalMeshPool, SSH, ALE_thickness, err + newHighFreqThickness(1:kMax,iCell) enddo !$omp end do + !$omp end parallel end if ! @@ -203,7 +209,10 @@ subroutine ocn_ALE_thickness(meshPool, verticalMeshPool, SSH, ALE_thickness, err ! if (config_use_min_max_thickness) then - !$omp do schedule(runtime) private(kMax, remainder, k, newThickness) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iCell, kMax, prelim_ALE_Thickness, remainder, k, newThickness, & + !$omp min_ALE_thickness_down, min_ALE_thickness_up) do iCell = 1, nCells kMax = maxLevelCell(iCell) @@ -235,6 +244,7 @@ subroutine ocn_ALE_thickness(meshPool, verticalMeshPool, SSH, ALE_thickness, err enddo !$omp end do + !$omp end parallel endif ! config_use_min_max_thickness diff --git a/src/core_ocean/shared/mpas_ocn_thick_hadv.F b/src/core_ocean/shared/mpas_ocn_thick_hadv.F index 0f7ef923c4..72b5f22712 100644 --- a/src/core_ocean/shared/mpas_ocn_thick_hadv.F +++ b/src/core_ocean/shared/mpas_ocn_thick_hadv.F @@ -149,6 +149,7 @@ subroutine ocn_thick_hadv_tend(meshPool, normalVelocity, layerThicknessEdge, ten nCells = nCellsArray( 1 ) + !$omp parallel !$omp do schedule(runtime) private(invAreaCell, i, iEdge, k, flux) do iCell = 1, nCells invAreaCell = 1.0_RKIND / areaCell(iCell) @@ -161,6 +162,7 @@ subroutine ocn_thick_hadv_tend(meshPool, normalVelocity, layerThicknessEdge, ten end do end do !$omp end do + !$omp end parallel call mpas_timer_stop("thick hadv") diff --git a/src/core_ocean/shared/mpas_ocn_thick_surface_flux.F b/src/core_ocean/shared/mpas_ocn_thick_surface_flux.F index 69b82a95ea..1566f29352 100644 --- a/src/core_ocean/shared/mpas_ocn_thick_surface_flux.F +++ b/src/core_ocean/shared/mpas_ocn_thick_surface_flux.F @@ -134,6 +134,7 @@ subroutine ocn_thick_surface_flux_tend(meshPool, transmissionCoefficients, trans nCells = nCellsArray( 1 ) + !$omp parallel !$omp do schedule(runtime) private(remainingFlux, remainingFluxRunoff, k) do iCell = 1, nCells remainingFlux = 1.0_RKIND @@ -156,6 +157,7 @@ subroutine ocn_thick_surface_flux_tend(meshPool, transmissionCoefficients, trans end if end do !$omp end do + !$omp end parallel call mpas_timer_stop("thick surface flux") diff --git a/src/core_ocean/shared/mpas_ocn_thick_vadv.F b/src/core_ocean/shared/mpas_ocn_thick_vadv.F index cbb0cc0f1e..1f6990a076 100644 --- a/src/core_ocean/shared/mpas_ocn_thick_vadv.F +++ b/src/core_ocean/shared/mpas_ocn_thick_vadv.F @@ -133,6 +133,7 @@ subroutine ocn_thick_vadv_tend(meshPool, vertAleTransportTop, tend, err)!{{{ nCells = nCellsArray( 1 ) + !$omp parallel !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) @@ -140,6 +141,7 @@ subroutine ocn_thick_vadv_tend(meshPool, vertAleTransportTop, tend, err)!{{{ end do end do !$omp end do + !$omp end parallel call mpas_timer_stop("thick vadv") diff --git a/src/core_ocean/shared/mpas_ocn_tidal_forcing.F b/src/core_ocean/shared/mpas_ocn_tidal_forcing.F index 4c47062160..f6fa320581 100644 --- a/src/core_ocean/shared/mpas_ocn_tidal_forcing.F +++ b/src/core_ocean/shared/mpas_ocn_tidal_forcing.F @@ -124,6 +124,7 @@ subroutine ocn_tidal_forcing_layer_thickness(meshPool, forcingPool, layerThickne nCells = nCellsArray( 2 ) ! Build surface fluxes at cell centers + !$omp parallel !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) @@ -133,6 +134,7 @@ subroutine ocn_tidal_forcing_layer_thickness(meshPool, forcingPool, layerThickne end do end do !$omp end do + !$omp end parallel call mpas_timer_stop("tidal thickness tendency") diff --git a/src/core_ocean/shared/mpas_ocn_time_average_coupled.F b/src/core_ocean/shared/mpas_ocn_time_average_coupled.F index 7facb93861..848fa31ae8 100644 --- a/src/core_ocean/shared/mpas_ocn_time_average_coupled.F +++ b/src/core_ocean/shared/mpas_ocn_time_average_coupled.F @@ -86,6 +86,7 @@ subroutine ocn_time_average_coupled_init(forcingPool)!{{{ call mpas_pool_get_array(forcingPool, 'avgSSHGradient', avgSSHGradient) call mpas_pool_get_array(forcingPool, 'nAccumulatedCoupled', nAccumulatedCoupled) + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells avgSurfaceVelocity(:, iCell) = 0.0_RKIND @@ -93,12 +94,14 @@ subroutine ocn_time_average_coupled_init(forcingPool)!{{{ avgSSHGradient(:, iCell) = 0.0_RKIND end do !$omp end do + !$omp end parallel if(trim(config_land_ice_flux_mode) == 'coupled') then call mpas_pool_get_array(forcingPool, 'avgLandIceBoundaryLayerTracers', avgLandIceBoundaryLayerTracers) call mpas_pool_get_array(forcingPool, 'avgLandIceTracerTransferVelocities', avgLandIceTracerTransferVelocities) call mpas_pool_get_array(forcingPool, 'avgEffectiveDensityInLandIce', avgEffectiveDensityInLandIce) + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells avgLandIceBoundaryLayerTracers(:, iCell) = 0.0_RKIND @@ -106,6 +109,7 @@ subroutine ocn_time_average_coupled_init(forcingPool)!{{{ avgEffectiveDensityInLandIce(iCell) = 0.0_RKIND end do !$omp end do + !$omp end parallel end if ! set up BGC coupling fields if necessary @@ -125,6 +129,7 @@ subroutine ocn_time_average_coupled_init(forcingPool)!{{{ call mpas_pool_get_array(ecosysSeaIceCoupling, 'avgOceanSurfaceFeParticulate', avgOceanSurfaceFeParticulate) call mpas_pool_get_array(ecosysSeaIceCoupling, 'avgOceanSurfaceFeDissolved', avgOceanSurfaceFeDissolved) + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells avgCO2_gas_flux(iCell) = 0.0_RKIND @@ -140,6 +145,7 @@ subroutine ocn_time_average_coupled_init(forcingPool)!{{{ avgOceanSurfaceFeDissolved(iCell) = 0.0_RKIND end do !$omp end do + !$omp end parallel end if if (config_use_DMSTracers) then @@ -148,12 +154,14 @@ subroutine ocn_time_average_coupled_init(forcingPool)!{{{ call mpas_pool_get_array(DMSSeaIceCoupling, 'avgOceanSurfaceDMS', avgOceanSurfaceDMS) call mpas_pool_get_array(DMSSeaIceCoupling, 'avgOceanSurfaceDMSP', avgOceanSurfaceDMSP) + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells avgOceanSurfaceDMS(iCell) = 0.0_RKIND avgOceanSurfaceDMSP(iCell) = 0.0_RKIND end do !$omp end do + !$omp end parallel endif if (config_use_MacroMoleculesTracers) then call mpas_pool_get_subpool(forcingPool, 'MacroMoleculesSeaIceCoupling', MacroMoleculesSeaIceCoupling) @@ -161,21 +169,18 @@ subroutine ocn_time_average_coupled_init(forcingPool)!{{{ call mpas_pool_get_array(MacroMoleculesSeaIceCoupling, 'avgOceanSurfaceDOC', avgOceanSurfaceDOC) call mpas_pool_get_array(MacroMoleculesSeaIceCoupling, 'avgOceanSurfaceDON', avgOceanSurfaceDON) + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells avgOceanSurfaceDOC(:,iCell) = 0.0_RKIND avgOceanSurfaceDON(iCell) = 0.0_RKIND end do !$omp end do + !$omp end parallel endif - !$omp master - nAccumulatedCoupled = 0 - !$omp end master - call mpas_threading_barrier() - end subroutine ocn_time_average_coupled_init!}}} !*********************************************************************** @@ -249,6 +254,7 @@ subroutine ocn_time_average_coupled_accumulate(diagnosticsPool, statePool, forci call mpas_pool_get_array(forcingPool, 'nAccumulatedCoupled', nAccumulatedCoupled) + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells avgTracersSurfaceValue(:, iCell) = avgTracersSurfaceValue(:, iCell) * nAccumulatedCoupled & @@ -264,6 +270,7 @@ subroutine ocn_time_average_coupled_accumulate(diagnosticsPool, statePool, forci / ( nAccumulatedCoupled + 1 ) end do !$omp end do + !$omp end parallel if(trim(config_land_ice_flux_mode) == 'coupled') then call mpas_pool_get_array(diagnosticsPool, 'landIceBoundaryLayerTracers', landIceBoundaryLayerTracers) @@ -274,6 +281,7 @@ subroutine ocn_time_average_coupled_accumulate(diagnosticsPool, statePool, forci call mpas_pool_get_array(forcingPool, 'avgLandIceTracerTransferVelocities', avgLandIceTracerTransferVelocities) call mpas_pool_get_array(forcingPool, 'avgEffectiveDensityInLandIce', avgEffectiveDensityInLandIce) + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells avgLandIceBoundaryLayerTracers(:, iCell) = ( avgLandIceBoundaryLayerTracers(:, iCell) * nAccumulatedCoupled & @@ -284,6 +292,7 @@ subroutine ocn_time_average_coupled_accumulate(diagnosticsPool, statePool, forci + effectiveDensityInLandIce(iCell) ) / ( nAccumulatedCoupled + 1) end do !$omp end do + !$omp end parallel end if ! accumulate BGC coupling fields if necessary @@ -307,6 +316,7 @@ subroutine ocn_time_average_coupled_accumulate(diagnosticsPool, statePool, forci call mpas_pool_get_subpool(statePool, 'tracers', tracersPool) call mpas_pool_get_array(tracersPool, 'ecosysTracers', ecosysTracers, 1) + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells @@ -338,6 +348,7 @@ subroutine ocn_time_average_coupled_accumulate(diagnosticsPool, statePool, forci end do !$omp end do + !$omp end parallel end if if (config_use_DMSTracers) then @@ -349,6 +360,7 @@ subroutine ocn_time_average_coupled_accumulate(diagnosticsPool, statePool, forci call mpas_pool_get_subpool(statePool, 'tracers', tracersPool) call mpas_pool_get_array(tracersPool, 'DMSTracers', DMSTracers, 1) + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells avgOceanSurfaceDMS(iCell) = ( avgOceanSurfaceDMS(iCell) * nAccumulatedCoupled & @@ -357,6 +369,7 @@ subroutine ocn_time_average_coupled_accumulate(diagnosticsPool, statePool, forci + DMSTracers(dmsIndices%dmsp_ind,1,iCell) ) / ( nAccumulatedCoupled + 1) end do !$omp end do + !$omp end parallel endif if (config_use_MacroMoleculesTracers) then @@ -368,6 +381,7 @@ subroutine ocn_time_average_coupled_accumulate(diagnosticsPool, statePool, forci call mpas_pool_get_subpool(statePool, 'tracers', tracersPool) call mpas_pool_get_array(tracersPool, 'MacroMoleculesTracers', MacroMoleculesTracers, 1) + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells avgOceanSurfaceDOC(1,iCell) = ( avgOceanSurfaceDOC(1,iCell) * nAccumulatedCoupled & @@ -379,15 +393,11 @@ subroutine ocn_time_average_coupled_accumulate(diagnosticsPool, statePool, forci + MacroMoleculesTracers(macrosIndices%prot_ind,1,iCell) ) / ( nAccumulatedCoupled + 1) end do !$omp end do + !$omp end parallel endif - !$omp master - nAccumulatedCoupled = nAccumulatedCoupled + 1 - !$omp end master - call mpas_threading_barrier() - end subroutine ocn_time_average_coupled_accumulate!}}} end module ocn_time_average_coupled diff --git a/src/core_ocean/shared/mpas_ocn_time_varying_forcing.F b/src/core_ocean/shared/mpas_ocn_time_varying_forcing.F index e1826d6298..8142e271ce 100644 --- a/src/core_ocean/shared/mpas_ocn_time_varying_forcing.F +++ b/src/core_ocean/shared/mpas_ocn_time_varying_forcing.F @@ -366,6 +366,7 @@ subroutine post_atmospheric_forcing(block)!{{{ ramp = tanh((2.0_RKIND*daysSinceStartOfSim)/config_time_varying_atmospheric_forcing_ramp) + !$omp parallel !$omp do schedule(runtime) private(iCell, windStressCoefficient) do iCell = 1, nCells @@ -384,6 +385,7 @@ subroutine post_atmospheric_forcing(block)!{{{ enddo !$omp end do + !$omp end parallel end subroutine post_atmospheric_forcing!}}} diff --git a/src/core_ocean/shared/mpas_ocn_tracer_TTD.F b/src/core_ocean/shared/mpas_ocn_tracer_TTD.F index 86365b4c1b..2d9f0a3395 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_TTD.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_TTD.F @@ -120,11 +120,13 @@ subroutine ocn_tracer_TTD_compute(nTracers, nCellsSolve, maxLevelCell, layerThic ! zero tracers at surface to TTDMask at top-most layer ! TTDMask should be 1 within region of interest and zero elsewhere + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCellsSolve tracers(:, 1, iCell) = TTDMask(:, iCell) end do !$omp end do + !$omp end parallel !-------------------------------------------------------------------- diff --git a/src/core_ocean/shared/mpas_ocn_tracer_advection_mono.F b/src/core_ocean/shared/mpas_ocn_tracer_advection_mono.F index 9efee519d5..bfaf5196f3 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_advection_mono.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_advection_mono.F @@ -230,7 +230,6 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & nEdges = nEdgesArray(size(nEdgesArray)) ! allocate temporary arrays - !$omp master allocate(tracerCur (nVertLevels ,nCells+1), & tracerMin (nVertLevels ,nCells), & tracerMax (nVertLevels ,nCells), & @@ -242,8 +241,6 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & workTend (nVertLevels ,nCells+1), & lowOrderFlx (nVertLevels+1,max(nCells,nEdges)+1), & highOrderFlx(nVertLevels+1,max(nCells,nEdges)+1)) - !$omp end master - !$omp barrier ! Compute some provisional layer thicknesses ! Note: This assumes we are in the first part of the horizontal/ @@ -251,7 +248,8 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & ! we dont flip order and horizontal is always first. ! See notes in commit 2cd4a89d. - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, invAreaCell1, kmax, k, i, iEdge, signedFactor) do iCell = 1, nCells invAreaCell1 = dt/areaCell(iCell) kmax = maxLevelCell(iCell) @@ -275,6 +273,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do end do !$omp end do + !$omp end parallel #ifdef _ADV_TIMERS call mpas_timer_stop('startup') @@ -290,13 +289,15 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & nCells = nCellsArray( size(nCellsArray) ) ! Extract current tracer and change index order to improve locality - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(k) do iCell = 1, nCells+1 do k=1, nVertLevels tracerCur(k,iCell) = tracers(iTracer,k,iCell) end do ! k loop end do ! iCell loop !$omp end do + !$omp end parallel ! Compute the high and low order horizontal fluxes. #ifdef _ADV_TIMERS @@ -310,7 +311,8 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & ! Determine bounds on tracer (tracerMin and tracerMax) from ! surrounding cells for later limiting. - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, k, i, cell2, kmax) do iCell = 1, nCells do k=1, maxLevelCell(iCell) tracerMin(k,iCell) = tracerCur(k,iCell) @@ -329,13 +331,17 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do ! i loop over nEdgesOnCell end do !$omp end do + !$omp end parallel ! Need all the edges around the 1 halo cells and owned cells nEdges = nEdgesArray( 3 ) ! Compute the high order horizontal flux - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iEdge, cell1, cell2, k, wgtTmp, sgnTmp, flxTmp, i, iCell, coef1, coef3, & + !$omp tracerWeight) do iEdge = 1, nEdges cell1 = cellsOnEdge(1, iEdge) cell2 = cellsOnEdge(2, iEdge) @@ -386,6 +392,8 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do ! k loop end do ! iEdge loop !$omp end do + !$omp end parallel + #ifdef _ADV_TIMERS call mpas_timer_stop('horiz flux') call mpas_timer_start('scale factor build') @@ -393,6 +401,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & ! Initialize flux arrays for all cells nCells = nCellsArray(size(nCellsArray)) + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells+1 do k=1, nVertLevels @@ -402,11 +411,15 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do ! k loop end do ! iCell loop !$omp end do + !$omp end parallel ! Need one halo of cells around owned cells nCells = nCellsArray( 2 ) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iCell, invAreaCell1, i, iEdge, cell1, cell2, signedFactor, k, & + !$omp tracerUpwindNew, tracerMinNew, tracerMaxNew, scaleFactor) do iCell = 1, nCells invAreaCell1 = 1.0_RKIND / areaCell(iCell) @@ -456,6 +469,8 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do ! k loop end do ! iCell loop !$omp end do + !$omp end parallel + #ifdef _ADV_TIMERS call mpas_timer_stop('scale factor build') call mpas_timer_start('rescale horiz fluxes') @@ -463,6 +478,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & ! Need all of the edges around owned cells nEdges = nEdgesArray( 2 ) ! rescale the high order horizontal fluxes + !$omp parallel !$omp do schedule(runtime) private(cell1, cell2, k, flux) do iEdge = 1, nEdges cell1 = cellsOnEdge(1,iEdge) @@ -475,6 +491,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do ! k loop end do ! iEdge loop !$omp end do + !$omp end parallel #ifdef _ADV_TIMERS call mpas_timer_stop('rescale horiz fluxes') call mpas_timer_start('flux accumulate') @@ -482,6 +499,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & nCells = nCellsArray( 1 ) ! Accumulate the scaled high order vertical tendencies, and the upwind tendencies + !$omp parallel !$omp do schedule(runtime) private(invAreaCell1, signedFactor, i, iEdge, flux, k) do iCell = 1, nCells invAreaCell1 = 1.0_RKIND / areaCell(iCell) @@ -508,6 +526,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do ! iCell loop !$omp end do + !$omp end parallel #ifdef _ADV_TIMERS call mpas_timer_stop('flux accumulate') @@ -516,7 +535,8 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & ! Compute budget and monotonicity diagnostics if needed if (computeBudgets) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge, k) do iEdge = 1,nEdgesArray( 2 ) do k = 1,maxLevelEdgeTop(iEdge) ! Save u*h*T flux on edge for analysis. This variable will be @@ -527,7 +547,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & enddo !$omp end do - !$omp do schedule(runtime) + !$omp do schedule(runtime) private(iCell, k) do iCell = 1, nCellsArray( 1 ) do k = 1, maxLevelCell(iCell) activeTracerHorizontalAdvectionTendency(iTracer,k,iCell) = & @@ -535,6 +555,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do end do ! iCell loop !$omp end do + !$omp end parallel end if ! computeBudgets @@ -542,7 +563,8 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & nCells = nCellsArray( 1 ) ! Check tracer values against local min,max to detect ! non-monotone values and write warning if found - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) if(tracerCur(k,iCell) < tracerMin(k, iCell)-eps) then @@ -561,6 +583,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do end do !$omp end do + !$omp end parallel end if ! monotonicity check #ifdef _ADV_TIMERS call mpas_timer_stop('advect diags') @@ -578,6 +601,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & #endif nCells = nCellsArray( size(nCellsArray) ) ! Initialize variables for use in this iTracer iteration + !$omp parallel !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k=1, nVertLevels @@ -588,6 +612,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do ! iCell loop !$omp end do + !$omp end parallel #ifdef _ADV_TIMERS call mpas_timer_stop('cell init') #endif @@ -600,7 +625,8 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & nCells = nCellsArray( 2 ) ! Determine bounds on tracerCur from neighbor values for limiting - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, kmax, k) do iCell = 1, nCells kmax = maxLevelCell(iCell) @@ -624,6 +650,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & tracerCur(kmax-1,iCell)) end do ! cell loop !$omp end do + !$omp end parallel ! Compute the high order vertical fluxes based on order selected. ! Special cases for top and bottom layers handled in following loop. @@ -631,7 +658,8 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & select case (vertOrder) case (vertOrder4) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, kmax, k) do iCell = 1, nCells kmax = maxLevelCell(iCell) do k=3,kmax-1 @@ -643,10 +671,12 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do end do ! cell loop !$omp end do + !$omp end parallel case (vertOrder3) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, kmax, k) do iCell = 1, nCells kmax = maxLevelCell(iCell) do k=3,kmax-1 @@ -662,10 +692,12 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do end do ! cell loop !$omp end do + !$omp end parallel case (vertOrder2) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, kmax, k, verticalWeightK, verticalWeightKm1) do iCell = 1, nCells kmax = maxLevelCell(iCell) do k=3,kmax-1 @@ -679,6 +711,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do end do ! cell loop !$omp end do + !$omp end parallel end select ! vertical order @@ -686,7 +719,8 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & ! Compute low order upwind vertical flux (monotonic and diffused) ! Remove low order flux from the high order flux. ! Store left over high order flux in highOrderFlx array. - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, kmax, verticalWeightK, verticalWeightKm1, k) do iCell = 1, nCells kmax = maxLevelCell(iCell) ! Next-to-top cell in column is second-order @@ -737,6 +771,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do ! k Loop end do ! iCell Loop !$omp end do + !$omp end parallel #ifdef _ADV_TIMERS call mpas_timer_stop('vertical flux') @@ -744,7 +779,9 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & #endif ! Need one halo of cells around owned cells nCells = nCellsArray( 2 ) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iCell, k, tracerMinNew, tracerMaxNew, tracerUpwindNew, scaleFactor) do iCell = 1, nCells ! Build the scale factors to limit flux for FCT @@ -773,6 +810,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do ! k loop end do ! iCell loop !$omp end do + !$omp end parallel #ifdef _ADV_TIMERS call mpas_timer_stop('scale factor build') call mpas_timer_start('flux accumulate') @@ -781,7 +819,8 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & nCells = nCellsArray( 1 ) ! Accumulate the scaled high order vertical tendencies ! and the upwind tendencies - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, kmax, k, flux) do iCell = 1, nCells kmax = maxLevelCell(iCell) ! rescale the high order vertical flux @@ -806,6 +845,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do ! iCell loop !$omp end do + !$omp end parallel ! Compute advection diagnostics and monotonicity checks if requested #ifdef _ADV_TIMERS @@ -813,7 +853,8 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & call mpas_timer_start('advect diags') #endif if (computeBudgets) then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, k) do iCell = 1, nCells do k = 2, maxLevelCell(iCell) activeTracerVerticalAdvectionTopFlux(iTracer,k,iCell) = & @@ -825,12 +866,14 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do end do ! iCell loop !$omp end do + !$omp end parallel end if ! computeBudgets if (monotonicityCheck) then nCells = nCellsArray( 1 ) ! Check for monotonicity of new tracer value - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, k, tracerNew) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) ! workTend on the RHS is the total vertical advection tendency @@ -853,6 +896,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & end do end do !$omp end do + !$omp end parallel end if ! monotonicity check #ifdef _ADV_TIMERS call mpas_timer_stop('advect diags') @@ -862,8 +906,6 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & #ifdef _ADV_TIMERS call mpas_timer_start('deallocates') #endif - !$omp barrier - !$omp master deallocate(tracerCur, & tracerMin, & tracerMax, & @@ -875,7 +917,6 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & workTend, & lowOrderFlx, & highOrderFlx) - !$omp end master #ifdef _ADV_TIMERS call mpas_timer_stop('deallocates') diff --git a/src/core_ocean/shared/mpas_ocn_tracer_advection_std.F b/src/core_ocean/shared/mpas_ocn_tracer_advection_std.F index 3607ec19a4..6c24b5e420 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_advection_std.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_advection_std.F @@ -139,6 +139,7 @@ subroutine ocn_tracer_advection_std_tend(tracers, adv_coefs, adv_coefs_3rd, nAdv ! Loop over tracers. One tracer is advected at a time. It is copied into a temporary array in order to improve locality do iTracer = 1, num_tracers ! Initialize variables for use in this iTracer iteration + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells tracer_cur(:, iCell) = tracers(iTracer, :, iCell) @@ -236,6 +237,7 @@ subroutine ocn_tracer_advection_std_tend(tracers, adv_coefs, adv_coefs_3rd, nAdv end do ! k loop end do ! iCell loop !$omp end do + !$omp end parallel end do ! iTracer loop !$omp barrier diff --git a/src/core_ocean/shared/mpas_ocn_tracer_exponential_decay.F b/src/core_ocean/shared/mpas_ocn_tracer_exponential_decay.F index 2c31edc904..63379c8b2a 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_exponential_decay.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_exponential_decay.F @@ -119,6 +119,7 @@ subroutine ocn_tracer_exponential_decay_compute(nTracers, nCellsSolve, maxLevelC err = 0 + !$omp parallel !$omp do schedule(runtime) private(iLevel, iTracer) do iCell=1,nCellsSolve do iLevel=1,maxLevelCell(iCell) @@ -131,6 +132,7 @@ subroutine ocn_tracer_exponential_decay_compute(nTracers, nCellsSolve, maxLevelC enddo enddo !$omp end do + !$omp end parallel !-------------------------------------------------------------------- diff --git a/src/core_ocean/shared/mpas_ocn_tracer_hmix_del2.F b/src/core_ocean/shared/mpas_ocn_tracer_hmix_del2.F index f9fe0e1d2d..4b6eafd199 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_hmix_del2.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_hmix_del2.F @@ -153,7 +153,10 @@ subroutine ocn_tracer_hmix_del2_tend(meshPool, layerThicknessEdge, tracers, tend ! ! compute a boundary mask to enforce insulating boundary conditions in the horizontal ! - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iCell, invAreaCell, i, iEdge, cell1, cell2, r_tmp, k, iTracer, & + !$omp tracer_turb_flux, flux) do iCell = 1, nCells invAreaCell = 1.0_RKIND / areaCell(iCell) do i = 1, nEdgesOnCell(iCell) @@ -178,6 +181,7 @@ subroutine ocn_tracer_hmix_del2_tend(meshPool, layerThicknessEdge, tracers, tend end do end do !$omp end do + !$omp end parallel call mpas_timer_stop("tracer del2") diff --git a/src/core_ocean/shared/mpas_ocn_tracer_hmix_del4.F b/src/core_ocean/shared/mpas_ocn_tracer_hmix_del4.F index b0f23851ee..016dbcd474 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_hmix_del4.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_hmix_del4.F @@ -182,6 +182,7 @@ subroutine ocn_tracer_hmix_del4_tend(meshPool, layerThicknessEdge, tracers, tend nCells = nCellsArray( 2 ) ! first del2: div(h \nabla \phi) at cell center + !$omp parallel !$omp do schedule(runtime) private(invAreaCell1, i, iEdge, invdcEdge, cell1, cell2, k, iTracer, r_tmp1, r_tmp2) do iCell = 1, nCells delsq_tracer(:, :, iCell) = 0.0_RKIND @@ -205,11 +206,13 @@ subroutine ocn_tracer_hmix_del4_tend(meshPool, layerThicknessEdge, tracers, tend end do end do !$omp end do + !$omp end parallel ! Only need tendency on owned cells nCells = nCellsArray( 1 ) ! second del2: div(h \nabla [delsq_tracer]) at cell center + !$omp parallel !$omp do schedule(runtime) private(invAreaCell1, i, iEdge, cell1, cell2, invdcEdge, k, iTracer, tracer_turb_flux, flux) do iCell = 1, nCells invAreaCell1 = 1.0_RKIND / areaCell(iCell) @@ -232,6 +235,7 @@ subroutine ocn_tracer_hmix_del4_tend(meshPool, layerThicknessEdge, tracers, tend end do end do !$omp end do + !$omp end parallel !$omp barrier !$omp master diff --git a/src/core_ocean/shared/mpas_ocn_tracer_ideal_age.F b/src/core_ocean/shared/mpas_ocn_tracer_ideal_age.F index fbcd0ae231..8188601981 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_ideal_age.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_ideal_age.F @@ -118,6 +118,7 @@ subroutine ocn_tracer_ideal_age_compute(nTracers, nCellsSolve, maxLevelCell, lay err = 0 + !$omp parallel !$omp do schedule(runtime) private(iLevel, iTracer) do iCell=1,nCellsSolve do iLevel=1,maxLevelCell(iCell) @@ -133,6 +134,7 @@ subroutine ocn_tracer_ideal_age_compute(nTracers, nCellsSolve, maxLevelCell, lay enddo enddo !$omp end do + !$omp end parallel !-------------------------------------------------------------------- diff --git a/src/core_ocean/shared/mpas_ocn_tracer_interior_restoring.F b/src/core_ocean/shared/mpas_ocn_tracer_interior_restoring.F index 6ec9612057..5b5c49f2a9 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_interior_restoring.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_interior_restoring.F @@ -118,6 +118,7 @@ subroutine ocn_tracer_interior_restoring_compute(nTracers, nCellsSolve, maxLevel err = 0 + !$omp parallel !$omp do schedule(runtime) private(iLevel, iTracer) do iCell=1,nCellsSolve do iLevel=1,maxLevelCell(iCell) @@ -130,6 +131,7 @@ subroutine ocn_tracer_interior_restoring_compute(nTracers, nCellsSolve, maxLevel enddo enddo !$omp end do + !$omp end parallel !-------------------------------------------------------------------- diff --git a/src/core_ocean/shared/mpas_ocn_tracer_nonlocalflux.F b/src/core_ocean/shared/mpas_ocn_tracer_nonlocalflux.F index f5054cb676..3d16139d40 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_nonlocalflux.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_nonlocalflux.F @@ -129,6 +129,7 @@ subroutine ocn_tracer_nonlocalflux_tend(meshPool, vertNonLocalFlux, surfaceTrace nCells = nCellsArray( 1 ) + !$omp parallel !$omp do schedule(runtime) private(k, iTracer, fluxTopOfCell, fluxBottomOfCell) do iCell = 1, nCells do k = 2, maxLevelCell(iCell)-1 @@ -159,6 +160,7 @@ subroutine ocn_tracer_nonlocalflux_tend(meshPool, vertNonLocalFlux, surfaceTrace end do !$omp end do + !$omp end parallel call mpas_timer_stop('non-local flux') diff --git a/src/core_ocean/shared/mpas_ocn_tracer_short_wave_absorption_jerlov.F b/src/core_ocean/shared/mpas_ocn_tracer_short_wave_absorption_jerlov.F index 6f3c38357e..70470557a5 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_short_wave_absorption_jerlov.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_short_wave_absorption_jerlov.F @@ -144,7 +144,8 @@ subroutine ocn_tracer_short_wave_absorption_jerlov_tend(meshPool, forcingPool, i nCells = nCellsArray( 3 ) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, depth, k, weights, depLev) do iCell = 1, nCells depth = 0.0_RKIND do k = 1, maxLevelCell(iCell) @@ -170,6 +171,7 @@ subroutine ocn_tracer_short_wave_absorption_jerlov_tend(meshPool, forcingPool, i end do !$omp end do + !$omp end parallel deallocate(weights) diff --git a/src/core_ocean/shared/mpas_ocn_tracer_short_wave_absorption_variable.F b/src/core_ocean/shared/mpas_ocn_tracer_short_wave_absorption_variable.F index 5611ddc129..56e2c6bac2 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_short_wave_absorption_variable.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_short_wave_absorption_variable.F @@ -153,7 +153,8 @@ subroutine ocn_tracer_short_wave_absorption_variable_tend(meshPool, swForcingPoo nCells = nCellsArray( 3 ) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, depth, cloudRatio, k, depLev) do iCell = 1, nCells depth = 0.0_RKIND cloudRatio = min(1.0_RKIND, 1.0_RKIND - penetrativeTemperatureFlux(iCell)/(hflux_factor*(1.0E-15_RKIND + & @@ -185,6 +186,7 @@ subroutine ocn_tracer_short_wave_absorption_variable_tend(meshPool, swForcingPoo end do !$omp end do + !$omp end parallel deallocate(weights) diff --git a/src/core_ocean/shared/mpas_ocn_tracer_surface_flux_to_tend.F b/src/core_ocean/shared/mpas_ocn_tracer_surface_flux_to_tend.F index eabc62e02c..4960fe8405 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_surface_flux_to_tend.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_surface_flux_to_tend.F @@ -141,6 +141,7 @@ subroutine ocn_tracer_surface_flux_tend(meshPool, fractionAbsorbed, fractionAbso nCells = nCellsArray( 1 ) + !$omp parallel !$omp do schedule(runtime) private(remainingFlux, k, iTracer) do iCell = 1, nCells remainingFlux = 1.0_RKIND @@ -160,6 +161,7 @@ subroutine ocn_tracer_surface_flux_tend(meshPool, fractionAbsorbed, fractionAbso end if end do !$omp end do + !$omp end parallel call mpas_timer_stop("surface_tracer_flux") @@ -168,6 +170,7 @@ subroutine ocn_tracer_surface_flux_tend(meshPool, fractionAbsorbed, fractionAbso if (associated(surfaceTracerFluxRunoff)) then call mpas_timer_start("surface_tracer_runoff_flux") + !$omp parallel !$omp do schedule(runtime) private(remainingFlux, k, iTracer) do iCell = 1, nCells remainingFlux = 1.0_RKIND @@ -188,6 +191,7 @@ subroutine ocn_tracer_surface_flux_tend(meshPool, fractionAbsorbed, fractionAbso end if end do !$omp end do + !$omp end parallel call mpas_timer_stop("surface_tracer_runoff_flux") end if diff --git a/src/core_ocean/shared/mpas_ocn_tracer_surface_restoring.F b/src/core_ocean/shared/mpas_ocn_tracer_surface_restoring.F index 8b3f7319ec..b93acfd7fd 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_surface_restoring.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_surface_restoring.F @@ -136,6 +136,7 @@ subroutine ocn_tracer_surface_restoring_compute(groupName, nTracers, nCells, tra iLevel = 1 ! base surface flux restoring on tracer fields in the top layer + !$omp parallel !$omp do schedule(runtime) private(iTracer) do iCell=1,nCells do iTracer=1,nTracers @@ -155,6 +156,7 @@ subroutine ocn_tracer_surface_restoring_compute(groupName, nTracers, nCells, tra enddo enddo !$omp end do + !$omp end parallel !-------------------------------------------------------------------- diff --git a/src/core_ocean/shared/mpas_ocn_vel_forcing_explicit_bottom_drag.F b/src/core_ocean/shared/mpas_ocn_vel_forcing_explicit_bottom_drag.F index 9d45a68674..69dc8c18af 100644 --- a/src/core_ocean/shared/mpas_ocn_vel_forcing_explicit_bottom_drag.F +++ b/src/core_ocean/shared/mpas_ocn_vel_forcing_explicit_bottom_drag.F @@ -130,6 +130,7 @@ subroutine ocn_vel_forcing_explicit_bottom_drag_tend(meshPool, normalVelocity, & nEdges = nEdgesArray( 1 ) + !$omp parallel !$omp do schedule(runtime) private(k, cell1, cell2) do iEdge = 1, nEdges cell1 = cellsOnEdge(1,iEdge) @@ -148,6 +149,7 @@ subroutine ocn_vel_forcing_explicit_bottom_drag_tend(meshPool, normalVelocity, & enddo !$omp end do + !$omp end parallel call mpas_timer_stop('vel explicit bottom drag') diff --git a/src/core_ocean/shared/mpas_ocn_vel_forcing_surface_stress.F b/src/core_ocean/shared/mpas_ocn_vel_forcing_surface_stress.F index 7007c628ae..36f4a5b0a2 100644 --- a/src/core_ocean/shared/mpas_ocn_vel_forcing_surface_stress.F +++ b/src/core_ocean/shared/mpas_ocn_vel_forcing_surface_stress.F @@ -144,7 +144,10 @@ subroutine ocn_vel_forcing_surface_stress_tend(meshPool, surfaceFluxAttenuationC nEdges = nEdgesArray ( 1 ) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iEdge, zTop, cell1, cell2, attenuationCoeff, transmissionCoeffTop, & + !$omp remainingStress, k, zBot) do iEdge = 1, nEdges zTop = 0.0_RKIND cell1 = cellsOnEdge(1,iEdge) @@ -174,6 +177,7 @@ subroutine ocn_vel_forcing_surface_stress_tend(meshPool, surfaceFluxAttenuationC end if enddo !$omp end do + !$omp end parallel call mpas_timer_stop('vel surface stress') diff --git a/src/core_ocean/shared/mpas_ocn_vel_hadv_coriolis.F b/src/core_ocean/shared/mpas_ocn_vel_hadv_coriolis.F index c4f0df8879..f7b04ef06e 100644 --- a/src/core_ocean/shared/mpas_ocn_vel_hadv_coriolis.F +++ b/src/core_ocean/shared/mpas_ocn_vel_hadv_coriolis.F @@ -149,7 +149,9 @@ subroutine ocn_vel_hadv_coriolis_tend(meshPool, normalizedRelativeVorticityEdge, allocate( qArr(nVertLevels) ) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iEdge, cell1, cell2, invLength, k, qArr, j, eoe, edgeWeight, workVorticity) do iEdge = 1, nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) @@ -180,6 +182,7 @@ subroutine ocn_vel_hadv_coriolis_tend(meshPool, normalizedRelativeVorticityEdge, end do !$omp end do + !$omp end parallel deallocate( qArr ) diff --git a/src/core_ocean/shared/mpas_ocn_vel_hmix_del2.F b/src/core_ocean/shared/mpas_ocn_vel_hmix_del2.F index 0b2002a493..b3148813b7 100644 --- a/src/core_ocean/shared/mpas_ocn_vel_hmix_del2.F +++ b/src/core_ocean/shared/mpas_ocn_vel_hmix_del2.F @@ -153,6 +153,7 @@ subroutine ocn_vel_hmix_del2_tend(meshPool, divergence, relativeVorticity, visco nEdges = nEdgesArray( 1 ) + !$omp parallel !$omp do schedule(runtime) private(cell1, cell2, vertex1, vertex2, invLength1, invLength2, k, u_diffusion, visc2) do iEdge = 1, nEdges cell1 = cellsOnEdge(1,iEdge) @@ -181,6 +182,7 @@ subroutine ocn_vel_hmix_del2_tend(meshPool, divergence, relativeVorticity, visco end do end do !$omp end do + !$omp end parallel call mpas_timer_stop("vel del2") diff --git a/src/core_ocean/shared/mpas_ocn_vel_hmix_del4.F b/src/core_ocean/shared/mpas_ocn_vel_hmix_del4.F index 3748f2a277..01f4e4400b 100644 --- a/src/core_ocean/shared/mpas_ocn_vel_hmix_del4.F +++ b/src/core_ocean/shared/mpas_ocn_vel_hmix_del4.F @@ -189,7 +189,9 @@ subroutine ocn_vel_hmix_del4_tend(meshPool, divergence, relativeVorticity, tend, nEdges = nEdgesArray( 3 ) !Compute delsq_u - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iEdge, cell1, cell2, vertex1, vertex2, invDcEdge, invDvEdge, k) do iEdge = 1, nEdges delsq_u(:, iEdge) = 0.0_RKIND cell1 = cellsOnEdge(1,iEdge) @@ -208,11 +210,13 @@ subroutine ocn_vel_hmix_del4_tend(meshPool, divergence, relativeVorticity, tend, end do end do !$omp end do + !$omp end parallel nVertices = nVerticesArray( 2 ) ! Compute delsq_relativeVorticity - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iVertex, invAreaTri1, i, iEdge, k) do iVertex = 1, nVertices delsq_relativeVorticity(:, iVertex) = 0.0_RKIND invAreaTri1 = 1.0_RKIND / areaTriangle(iVertex) @@ -225,11 +229,13 @@ subroutine ocn_vel_hmix_del4_tend(meshPool, divergence, relativeVorticity, tend, end do end do !$omp end do + !$omp end parallel nCells = nCellsArray( 2 ) ! Compute delsq_divergence - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, invAreaCell1, i, iEdge, k) do iCell = 1, nCells delsq_divergence(:, iCell) = 0.0_RKIND invAreaCell1 = 1.0_RKIND / areaCell(iCell) @@ -242,12 +248,15 @@ subroutine ocn_vel_hmix_del4_tend(meshPool, divergence, relativeVorticity, tend, end do end do !$omp end do + !$omp end parallel nEdges = nEdgesArray( 1 ) ! Compute - \kappa \nabla^4 u ! as \nabla div(\nabla^2 u) + k \times \nabla ( k \cross curl(\nabla^2 u) ) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iEdge, cell1, cell2, vertex1, vertex2, invDcEdge, invDvEdge, r_tmp, k, u_diffusion) do iEdge = 1, nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) @@ -266,6 +275,7 @@ subroutine ocn_vel_hmix_del4_tend(meshPool, divergence, relativeVorticity, tend, end do end do !$omp end do + !$omp end parallel !$omp barrier !$omp master diff --git a/src/core_ocean/shared/mpas_ocn_vel_hmix_leith.F b/src/core_ocean/shared/mpas_ocn_vel_hmix_leith.F index 7897f7e68f..04e614446b 100644 --- a/src/core_ocean/shared/mpas_ocn_vel_hmix_leith.F +++ b/src/core_ocean/shared/mpas_ocn_vel_hmix_leith.F @@ -158,6 +158,7 @@ subroutine ocn_vel_hmix_leith_tend(meshPool, divergence, relativeVorticity, visc nEdges = nEdgesArray( 1 ) + !$omp parallel !$omp do schedule(runtime) private(cell1, cell2, vertex1, vertex2, invLength_dc, invLength_dv, k, u_diffusion, visc2) do iEdge = 1, nEdges cell1 = cellsOnEdge(1,iEdge) @@ -191,6 +192,7 @@ subroutine ocn_vel_hmix_leith_tend(meshPool, divergence, relativeVorticity, visc end do end do !$omp end do + !$omp end parallel call mpas_timer_stop("vel leith") diff --git a/src/core_ocean/shared/mpas_ocn_vel_pressure_grad.F b/src/core_ocean/shared/mpas_ocn_vel_pressure_grad.F index ad6c8a809e..759686a91d 100644 --- a/src/core_ocean/shared/mpas_ocn_vel_pressure_grad.F +++ b/src/core_ocean/shared/mpas_ocn_vel_pressure_grad.F @@ -163,6 +163,7 @@ subroutine ocn_vel_pressure_grad_tend(meshPool, ssh, pressure, montgomeryPotenti ! pressure for sea surface height ! - g grad ssh + !$omp parallel !$omp do schedule(runtime) private(cell1, cell2, invdcEdge, k) do iEdge=1,nEdges cell1 = cellsOnEdge(1,iEdge) @@ -174,12 +175,14 @@ subroutine ocn_vel_pressure_grad_tend(meshPool, ssh, pressure, montgomeryPotenti end do end do !$omp end do + !$omp end parallel elseif (config_pressure_gradient_type.eq.'pressure_and_zmid') then ! pressure for generalized coordinates ! -1/density_0 (grad p_k + density g grad z_k^{mid}) + !$omp parallel !$omp do schedule(runtime) private(cell1, cell2, invdcEdge, k) do iEdge=1,nEdges cell1 = cellsOnEdge(1,iEdge) @@ -193,12 +196,14 @@ subroutine ocn_vel_pressure_grad_tend(meshPool, ssh, pressure, montgomeryPotenti end do end do !$omp end do + !$omp end parallel elseif (config_pressure_gradient_type.eq.'MontgomeryPotential') then ! For pure isopycnal coordinates, this is just grad(M), ! the gradient of Montgomery Potential + !$omp parallel !$omp do schedule(runtime) private(cell1, cell2, invdcEdge, k) do iEdge=1,nEdges cell1 = cellsOnEdge(1,iEdge) @@ -211,6 +216,7 @@ subroutine ocn_vel_pressure_grad_tend(meshPool, ssh, pressure, montgomeryPotenti end do end do !$omp end do + !$omp end parallel elseif (config_pressure_gradient_type.eq.'MontgomeryPotential_and_density') then @@ -220,6 +226,7 @@ subroutine ocn_vel_pressure_grad_tend(meshPool, ssh, pressure, montgomeryPotenti ! Where rho is the potential density. ! See Bleck (2002) equation 1, and last equation in Appendix A. + !$omp parallel !$omp do schedule(runtime) private(cell1, cell2, invdcEdge, k) do iEdge=1,nEdges cell1 = cellsOnEdge(1,iEdge) @@ -234,12 +241,14 @@ subroutine ocn_vel_pressure_grad_tend(meshPool, ssh, pressure, montgomeryPotenti end do end do !$omp end do + !$omp end parallel elseif (config_pressure_gradient_type.eq.'Jacobian_from_density') then allocate(JacobianDxDs(nVertLevels)) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge, cell1, cell2, invdcEdge, k, pGrad) do iEdge=1,nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) @@ -269,6 +278,7 @@ subroutine ocn_vel_pressure_grad_tend(meshPool, ssh, pressure, montgomeryPotenti end do end do !$omp end do + !$omp end parallel deallocate(JacobianDxDs) @@ -277,7 +287,10 @@ subroutine ocn_vel_pressure_grad_tend(meshPool, ssh, pressure, montgomeryPotenti allocate(JacobianDxDs(nVertLevels),JacobianTz(nVertLevels),JacobianSz(nVertLevels), T1(nVertLevels)) allocate(T2(nVertLevels), S1(nVertLevels), S2(nVertLevels)) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iEdge, cell1, cell2, invdcEdge, kMax, T1, T2, S1, S2, k, alpha, beta, & + !$omp pGrad, JacobianDxDs, JacobianTz, JacobianSz) do iEdge=1,nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) @@ -330,6 +343,7 @@ subroutine ocn_vel_pressure_grad_tend(meshPool, ssh, pressure, montgomeryPotenti end do end do !$omp end do + !$omp end parallel deallocate(JacobianDxDs,JacobianTz,JacobianSz, T1, T2, S1, S2) diff --git a/src/core_ocean/shared/mpas_ocn_vel_vadv.F b/src/core_ocean/shared/mpas_ocn_vel_vadv.F index 607ea71a2a..3a49f0fc5f 100644 --- a/src/core_ocean/shared/mpas_ocn_vel_vadv.F +++ b/src/core_ocean/shared/mpas_ocn_vel_vadv.F @@ -138,7 +138,10 @@ subroutine ocn_vel_vadv_tend(meshPool, normalVelocity, layerThicknessEdge, vertA nEdges = nEdgesArray( 1 ) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iEdge, cell1, cell2, k, vertAleTransportTopEdge) & + !$omp firstprivate(w_dudzTopEdge) do iEdge = 1, nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) @@ -159,6 +162,7 @@ subroutine ocn_vel_vadv_tend(meshPool, normalVelocity, layerThicknessEdge, vertA enddo enddo !$omp end do + !$omp end parallel deallocate(w_dudzTopEdge) diff --git a/src/core_ocean/shared/mpas_ocn_vmix.F b/src/core_ocean/shared/mpas_ocn_vmix.F index 463b164b93..2bc55efa19 100644 --- a/src/core_ocean/shared/mpas_ocn_vmix.F +++ b/src/core_ocean/shared/mpas_ocn_vmix.F @@ -159,19 +159,23 @@ subroutine ocn_vmix_coefs(meshPool, statePool, forcingPool, diagnosticsPool, scr nEdges = nEdgesArray( 2 ) + !$omp parallel !$omp do schedule(runtime) do iEdge = 1, nEdges vertViscTopOfEdge(:, iEdge) = 0.0_RKIND end do !$omp end do + !$omp end parallel nCells = nCellsArray( 2 ) + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells vertDiffTopOfCell(:, iCell) = 0.0_RKIND end do !$omp end do + !$omp end parallel call ocn_vmix_coefs_cvmix_build(meshPool, statePool, forcingPool, diagnosticsPool, err1, timeLevel) call ocn_vmix_coefs_redi_build(meshPool, statePool, diagnosticsPool, err2, timeLevel) @@ -271,7 +275,9 @@ subroutine ocn_vel_vmix_tend_implicit_rayleigh(meshPool, dt, kineticEnergyCell, allocate(A(nVertLevels),B(nVertLevels),C(nVertLevels),velTemp(nVertLevels)) A(1)=0.0_RKIND - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(iEdge, N, cell1, cell2, edgeThicknessTotal, k, A, B, C, velTemp) do iEdge = 1, nEdges N = maxLevelEdgeTop(iEdge) if (N .gt. 0) then @@ -327,6 +333,7 @@ subroutine ocn_vel_vmix_tend_implicit_rayleigh(meshPool, dt, kineticEnergyCell, end if end do !$omp end do + !$omp end parallel deallocate(A,B,C,velTemp) @@ -422,7 +429,8 @@ subroutine ocn_vel_vmix_tend_implicit(meshPool, dt, kineticEnergyCell, vertViscT allocate(A(nVertLevels),B(nVertLevels),C(nVertLevels),velTemp(nVertLevels)) A(1)=0.0_RKIND - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge, N, cell1, cell2, A, B, C, velTemp) do iEdge = 1, nEdges N = maxLevelEdgeTop(iEdge) if (N .gt. 0) then @@ -469,6 +477,7 @@ subroutine ocn_vel_vmix_tend_implicit(meshPool, dt, kineticEnergyCell, vertViscT end if end do !$omp end do + !$omp end parallel deallocate(A,B,C,velTemp) @@ -564,7 +573,8 @@ subroutine ocn_tracer_vmix_tend_implicit(meshPool, dt, vertDiffTopOfCell, layerT nCells = nCellsArray( 1 ) call mpas_timer_start('vmix tracers tend imp loop', .false.) - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iCell, N, A, B, C, rhs, tracersTemp) do iCell = 1, nCells ! Compute A(k), B(k), C(k) for tracers N = maxLevelCell(iCell) @@ -603,6 +613,7 @@ subroutine ocn_tracer_vmix_tend_implicit(meshPool, dt, vertDiffTopOfCell, layerT tracers(:,N+1:nVertLevels,iCell) = -1e34 end do !$omp end do + !$omp end parallel call mpas_timer_stop('vmix tracers tend imp loop') deallocate(A, B, C, tracersTemp, rhs) @@ -695,6 +706,7 @@ subroutine ocn_vmix_implicit(dt, meshPool, diagnosticsPool, statePool, forcingPo nEdges = nEdgesArray( 1 ) call mpas_timer_start('CVMix avg', .false.) + !$omp parallel !$omp do schedule(runtime) private(cell1, cell2, k) do iEdge=1,nEdges vertViscTopOfEdge(:, iEdge) = 0.0_RKIND @@ -705,6 +717,7 @@ subroutine ocn_vmix_implicit(dt, meshPool, diagnosticsPool, statePool, forcingPo end do end do !$omp end do + !$omp end parallel call mpas_timer_stop('CVMix avg') endif @@ -736,11 +749,13 @@ subroutine ocn_vmix_implicit(dt, meshPool, diagnosticsPool, statePool, forcingPo ! store tracers if (trim(groupItr % memberName) == 'activeTracers') then if (config_compute_active_tracer_budgets) then + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells activeTracerVertMixTendency(:,:,iCell)=tracersGroup(:,:,iCell) end do !$omp end do + !$omp end parallel endif endif @@ -762,12 +777,14 @@ subroutine ocn_vmix_implicit(dt, meshPool, diagnosticsPool, statePool, forcingPo ! difference tracers to compute influence of vertical mixing and divide by dt if (trim(groupItr % memberName) == 'activeTracers') then if (config_compute_active_tracer_budgets) then + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells activeTracerVertMixTendency(:,:,iCell) = & (tracersGroup(:,:,iCell) - activeTracerVertMixTendency(:,:,iCell)) / dt end do !$omp end do + !$omp end parallel endif endif diff --git a/src/core_ocean/shared/mpas_ocn_vmix_coefs_redi.F b/src/core_ocean/shared/mpas_ocn_vmix_coefs_redi.F index d1e72d5199..36ebc80ced 100644 --- a/src/core_ocean/shared/mpas_ocn_vmix_coefs_redi.F +++ b/src/core_ocean/shared/mpas_ocn_vmix_coefs_redi.F @@ -209,6 +209,7 @@ subroutine ocn_tracer_vmix_coefs_redi(meshPool, vertDiffTopOfCell, k33, RediKapp nCells = nCellsArray(1) + !$omp parallel !$omp do schedule(runtime) private(i, rediKappaCell, iEdge) do iCell = 1, nCells rediKappaCell = 0.0_RKIND @@ -221,6 +222,7 @@ subroutine ocn_tracer_vmix_coefs_redi(meshPool, vertDiffTopOfCell, k33, RediKapp / areaCell(iCell) end do !$omp end do + !$omp end parallel call mpas_timer_stop('tracer redi coef') diff --git a/src/core_ocean/shared/mpas_ocn_vmix_cvmix.F b/src/core_ocean/shared/mpas_ocn_vmix_cvmix.F index c0d1373a99..0c6777e112 100644 --- a/src/core_ocean/shared/mpas_ocn_vmix_cvmix.F +++ b/src/core_ocean/shared/mpas_ocn_vmix_cvmix.F @@ -268,6 +268,7 @@ subroutine ocn_vmix_coefs_cvmix_build(meshPool, statePool, forcingPool, diagnost nCells = nCellsArray( size(nCellsArray) ) + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells vertViscTopOfCell(:, iCell) = 0.0_RKIND @@ -275,6 +276,7 @@ subroutine ocn_vmix_coefs_cvmix_build(meshPool, statePool, forcingPool, diagnost vertNonLocalFlux(:, :, iCell) = 0.0_RKIND end do !$omp end do + !$omp end parallel nCells = nCellsArray( 3 ) @@ -282,6 +284,7 @@ subroutine ocn_vmix_coefs_cvmix_build(meshPool, statePool, forcingPool, diagnost ! start by adding the mininum background values to the viscocity/diffusivity arrays ! if (cvmixBackgroundOn) then + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells do k = 1, nVertLevelsP1 @@ -290,6 +293,7 @@ subroutine ocn_vmix_coefs_cvmix_build(meshPool, statePool, forcingPool, diagnost end do end do !$omp end do + !$omp end parallel endif ! @@ -335,7 +339,16 @@ subroutine ocn_vmix_coefs_cvmix_build(meshPool, statePool, forcingPool, diagnost call mpas_timer_start('cvmix cell loop', .false.) do kpp_stage = 1,2 - !$omp do schedule(runtime) + ! TODO, mdt: determine where the openmp data race is coming from in this loop + !!$omp parallel + !!$omp do schedule(runtime) & + !!$omp private(iCell, invAreaCell, k, RiSmoothed, RiTemp, BVFSmoothed, nsmooth, & + !!$omp langmuirNumber, langmuirEnhancementFactor, bulkRichardsonNumberStop, & + !!$omp bulkRichardsonFlag, topIndex, kIndexOBL, deltaVelocitySquared, sigma, & + !!$omp OBLDepths, interfaceForcings, surfaceAverageIndex, sfc_layer_depth, kav, & + !!$omp i, iEdge, factor, normalVelocitySum, NormalVelocityAv, & + !!$omp delU2, potentialDensitySum, blTemp) & + !!$omp firstprivate(cvmix_variables, Nsqr_iface, turbulentScalarVelocityScale) do iCell = 1, nCells ! specify geometry/location @@ -732,11 +745,13 @@ subroutine ocn_vmix_coefs_cvmix_build(meshPool, statePool, forcingPool, diagnost endif ! kpp stage 2 convection calc end do ! do iCell=1,mesh%nCells - !$omp end do + !!$omp end do + !!$omp end parallel if (kpp_stage == 1 .and. config_cvmix_use_BLD_smoothing) then ! smooth boundary layer nCells = nCellsArray(2) + !$omp parallel !$omp do schedule(runtime) private(nEdges, areaSum, edgeCount, iEdge, iNeighbor) do iCell=1,nCells nEdges = nEdgesOnCell(iCell) @@ -764,6 +779,7 @@ subroutine ocn_vmix_coefs_cvmix_build(meshPool, statePool, forcingPool, diagnost boundaryLayerDepth(iCell) = boundaryLayerDepthSmooth(iCell) enddo !$omp end do + !$omp end parallel endif !stage 1 BLD smoothing end do ! kpp_stage diff --git a/src/core_ocean/shared/mpas_ocn_wetting_drying.F b/src/core_ocean/shared/mpas_ocn_wetting_drying.F index a129304eb4..c0df1d0c2e 100644 --- a/src/core_ocean/shared/mpas_ocn_wetting_drying.F +++ b/src/core_ocean/shared/mpas_ocn_wetting_drying.F @@ -271,13 +271,13 @@ subroutine ocn_prevent_drying_rk4(block, dt, rkSubstepWeight, config_zero_drying call mpas_pool_get_array(meshPool, 'maxLevelEdgeTop', maxLevelEdgeTop) call mpas_pool_get_array(meshPool, 'maxLevelEdgeBot', maxLevelEdgeBot) - + !$omp parallel !$omp do schedule(runtime) do iEdge = 1, nEdges wettingVelocity(:, iEdge) = 0.0_RKIND end do !$omp end do - call mpas_threading_barrier() + !$omp end parallel ! ensure cells stay wet by selectively damping cells with a damping tendency to make sure tendency doesn't dry cells @@ -285,7 +285,8 @@ subroutine ocn_prevent_drying_rk4(block, dt, rkSubstepWeight, config_zero_drying normalTransportVelocity, rkSubstepWeight, wettingVelocity, err) ! prevent drying from happening with selective wettingVelocity - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private(iEdge, k) do iEdge = 1, nEdges do k = 1, maxLevelEdgeBot(iEdge) if (abs(normalTransportVelocity(k,iEdge) + wettingVelocity(k,iEdge)) < eps) then @@ -305,7 +306,7 @@ subroutine ocn_prevent_drying_rk4(block, dt, rkSubstepWeight, config_zero_drying end do end do !$omp end do - call mpas_threading_barrier() + !$omp end parallel end subroutine ocn_prevent_drying_rk4 !}}} @@ -402,6 +403,7 @@ subroutine ocn_wetting_drying_wettingVelocity(meshPool, layerThicknessEdge, laye call mpas_threading_barrier() ! need predicted transport velocity to limit drying flux + !$omp parallel !$omp do schedule(runtime) private(invAreaCell, i, iEdge, k) do iCell = 1, nCells invAreaCell = 1.0_RKIND / areaCell(iCell) @@ -445,8 +447,7 @@ subroutine ocn_wetting_drying_wettingVelocity(meshPool, layerThicknessEdge, laye end do end do !$omp end do - - call mpas_threading_barrier() + !$omp end parallel end subroutine ocn_wetting_drying_wettingVelocity !}}} From 9a3de98de478797812b721339347f3a70a9b0602 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Fri, 22 May 2020 14:25:29 -0700 Subject: [PATCH 03/16] Localized threaing in tracer_hmix_redi --- .../shared/mpas_ocn_tracer_hmix_redi.F | 34 ++++++++++++------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/core_ocean/shared/mpas_ocn_tracer_hmix_redi.F b/src/core_ocean/shared/mpas_ocn_tracer_hmix_redi.F index 26cff66aac..1c97572c8a 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_hmix_redi.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_hmix_redi.F @@ -169,15 +169,12 @@ subroutine ocn_tracer_hmix_Redi_tend(meshPool, layerThickness, layerThicknessEdg allocate (minimumVal(nTracers)) allocate (fluxRediZTop(nTracers, nVertLevels + 1)) - !$omp master allocate (rediKappaCell(nCells)) allocate (redi_term1(nTracers, nVertLevels, nCells)) allocate (redi_term2(nTracers, nVertLevels, nCells)) allocate (redi_term3(nTracers, nVertLevels, nCells)) allocate (redi_term2_edge(nTracers, nVertLevels, nEdges)) allocate (redi_term3_topOfCell(nTracers, nVertLevels + 1, nCells)) - !$omp end master - !$omp barrier nCells = nCellsArray(2) !$omp do schedule(runtime) private(i, iEdge) @@ -196,8 +193,11 @@ subroutine ocn_tracer_hmix_Redi_tend(meshPool, layerThickness, layerThicknessEdg ! Term 1: this is the "standard" horizontal del2 term, but with RediKappa coefficient. ! \kappa_2 \nabla \phi on edge - !$omp do schedule(runtime) private(invAreaCell, i, k, iTr, iEdge, cell1, cell2, iCellSelf, & - !$omp& r_tmp, coef, kappaRediEdgeVal, tracer_turb_flux, flux, flux_term2, flux_term3, dTracerDx) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(fluxRediZTop, invAreaCell, i, iEdge, cell1, cell2, iCellSelf, r_tmp, coef, k, & + !$omp kappaRediEdgeVal, iTr, tracer_turb_flux, flux, flux_term2, flux_term3, & + !$omp dTracerDx) do iCell = 1, nCells redi_term1(:, :, iCell) = 0.0_RKIND redi_term2(:, :, iCell) = 0.0_RKIND @@ -344,22 +344,26 @@ subroutine ocn_tracer_hmix_Redi_tend(meshPool, layerThickness, layerThicknessEdg end do end do ! iCell !$omp end do - !$omp barrier + !$omp end parallel !loop over cells and check for out of bounds if (isActiveTracer) then minimumVal(1) = -2.0_RKIND minimumVal(2:nTracers) = 0.0_RKIND + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells rediLimiterCount(:, iCell) = 0 end do !$omp end do + !$omp end parallel else minimumVal(:) = 0.0_RKIND endif - !$omp do schedule(runtime) private(k, iTr, tempTracer, iEdge) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(k, iTr, tempTracer, iEdge) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) do iTr = 1, ntracers @@ -383,10 +387,12 @@ subroutine ocn_tracer_hmix_Redi_tend(meshPool, layerThickness, layerThicknessEdg end do end do ! iCell !$omp end do - !$omp barrier + !$omp end parallel !now go back and reapply all tendencies - !$omp do schedule(runtime) private(invAreaCell, k, iTr, i, iEdge) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(invAreaCell, k, iTr, i, iEdge) do iCell = 1, nCells invAreaCell = 1.0_RKIND/areaCell(iCell) do k = 1, maxLevelCell(iCell) @@ -410,18 +416,16 @@ subroutine ocn_tracer_hmix_Redi_tend(meshPool, layerThickness, layerThicknessEdg end do end do ! iCell !$omp end do + !$omp end parallel deallocate (fluxRediZTop) deallocate (minimumVal) - !$omp master deallocate (rediKappaCell) deallocate (redi_term1) deallocate (redi_term2) deallocate (redi_term3) deallocate (redi_term2_edge) deallocate (redi_term3_topOfCell) - !$omp end master - !$omp barrier call mpas_timer_stop("tracer redi") @@ -481,12 +485,14 @@ subroutine ocn_tracer_hmix_Redi_init(domain, err)!{{{ call mpas_pool_get_array(meshPool, 'dcEdge', dcEdge) ! initialize Redi kappa array if (config_Redi_closure == 'constant') then + !$omp parallel !$omp do schedule(runtime) do iEdge = 1, nEdges RediKappa(iEdge) = config_Redi_kappa end do !$omp end do - else if (config_Redi_closure == 'data') then + !$omp end parallel + else if (config_Redi_closure == 'data') then ! read RediKappa in from input call mpas_log_write("config_Redi_closure = 'data'. "// & "Make sure that the variable RediKappa is read in from an input file.") @@ -500,6 +506,7 @@ subroutine ocn_tracer_hmix_Redi_init(domain, err)!{{{ if (config_eddying_resolution_taper == 'none') then ! Nothing to do, as we just keep the same assignment as above. else if (config_eddying_resolution_taper == 'ramp') then + !$omp parallel !$omp do schedule(runtime) do iEdge=1,nEdges if (dcEdge(iEdge) <= config_eddying_resolution_ramp_min) then @@ -513,6 +520,7 @@ subroutine ocn_tracer_hmix_Redi_init(domain, err)!{{{ end if end do ! iCell !$omp end do + !$omp end parallel else call mpas_log_write('Invalid choice of config_eddying_resolution_taper.', MPAS_LOG_CRIT) err = 1 From 777afcbac1f9b701061d8ccde49d6a10d49d7320 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Fri, 22 May 2020 17:46:16 -0700 Subject: [PATCH 04/16] Localized threading in mpas_ocn_gm --- src/core_ocean/shared/mpas_ocn_gm.F | 72 ++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 22 deletions(-) diff --git a/src/core_ocean/shared/mpas_ocn_gm.F b/src/core_ocean/shared/mpas_ocn_gm.F index cd31e9aa6c..9d9b2283ea 100644 --- a/src/core_ocean/shared/mpas_ocn_gm.F +++ b/src/core_ocean/shared/mpas_ocn_gm.F @@ -226,11 +226,7 @@ subroutine ocn_GM_compute_Bolus_velocity(statePool, diagnosticsPool, & !$omp end master !$omp barrier - allocate (rightHandSide(nVertLevels)) - allocate (tridiagA(nVertLevels)) - allocate (tridiagB(nVertLevels)) - allocate (tridiagC(nVertLevels)) - + !$omp parallel !$omp do schedule(runtime) private(k) do iEdge = 1, nEdges do k = 1, nVertLevels @@ -258,11 +254,6 @@ subroutine ocn_GM_compute_Bolus_velocity(statePool, diagnosticsPool, & end do end do !$omp end do - allocate (dzTop(nVertLevels + 1)) - allocate (dTdzTop(nVertLevels + 1)) - allocate (dSdzTop(nVertLevels + 1)) - allocate (k33Norm(nVertLevels + 1)) - !$omp do schedule(runtime) do iCell = 1, nCells RediKappaScaling(:, iCell) = 1.0_RKIND @@ -270,6 +261,12 @@ subroutine ocn_GM_compute_Bolus_velocity(statePool, diagnosticsPool, & RediKappaSfcTaper(:, iCell) = 1.0_RKIND end do !$omp end do + !$omp end parallel + + allocate (dzTop(nVertLevels + 1)) + allocate (dTdzTop(nVertLevels + 1)) + allocate (dSdzTop(nVertLevels + 1)) + allocate (k33Norm(nVertLevels + 1)) ! The following code computes scaling for Redi mixing terms and the slope triads ! It is only needed when redi mixing is enabled. @@ -277,7 +274,9 @@ subroutine ocn_GM_compute_Bolus_velocity(statePool, diagnosticsPool, & if ( config_Redi_use_N2_based_taper .and. .not. local_config_GM_kappa_lat_depth_variable ) then - !$omp do schedule(runtime) private(maxLocation, cell1, cell2, k, BruntVaisalaFreqTopEdge, maxN) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(k, maxN, maxLocation, BruntVaisalaFreqTopEdge) do iCell = 1, nCells k = min(maxLevelCell(iCell) - 1, max(1, indMLD(iCell))) maxN = max(BruntVaisalaFreqTop(k, iCell), 0.0_RKIND) @@ -296,9 +295,11 @@ subroutine ocn_GM_compute_Bolus_velocity(statePool, diagnosticsPool, & end do end do !$omp end do + !$omp end parallel end if if (config_Redi_use_surface_taper) then + !$omp parallel !$omp do schedule (runtime) private(zMLD, k) do iCell = 1, nCells k = max(1, indMLD(iCell)) @@ -309,12 +310,15 @@ subroutine ocn_GM_compute_Bolus_velocity(statePool, diagnosticsPool, & end do end do !$omp end do + !$omp end parallel end if nCells = nCellsArray(3) - !$omp do schedule(runtime) private(i, k, iEdge, cell1, cell2, iCellSelf, & - !$omp& invAreaCell, dcEdgeInv, areaEdge, drhoDT, drhoDS, dTdx, dSdx, drhoDx, & - !$omp& sfcTaper, sfcTaperUp, sfcTaperDown, slopeTaperDown, slopeTaperUp) + !$omp parallel + !$omp do schedule(runtime) & + !$omp private(invAreaCell, k33Norm, dzTop, dTdzTop, dSdzTop, i, iEdge, cell1, cell2, & + !$omp iCellSelf, dcEdgeInv, areaEdge, drhoDT, drhoDS, dTdx, dSdx, drhoDx, & + !$omp slopeTaperUp, slopeTaperDown, sfcTaperUp, sfcTaperDown, sfcTaper) do iCell = 1, nCells invAreaCell = 1.0_RKIND/areaCell(iCell) k33(1:maxLevelCell(iCell) + 1, iCell) = 0.0_RKIND @@ -427,6 +431,7 @@ subroutine ocn_GM_compute_Bolus_velocity(statePool, diagnosticsPool, & k33(maxLevelCell(iCell) + 1, iCell) = 0.0_RKIND end do ! iCell !$omp end do + !$omp end parallel endif !config_use_redi @@ -438,11 +443,13 @@ subroutine ocn_GM_compute_Bolus_velocity(statePool, diagnosticsPool, & ! allow disabling of K33 for testing if (config_disable_redi_k33) then nCells = nCellsArray(size(nCellsArray)) + !$omp parallel !$omp do schedule(runtime) do iCell = 1, nCells k33(:, iCell) = 0.0_RKIND end do !$omp end do + !$omp end parallel end if !-------------------------------------------------------------------- @@ -453,6 +460,7 @@ subroutine ocn_GM_compute_Bolus_velocity(statePool, diagnosticsPool, & if (config_use_GM) then nEdges = nEdgesArray(3) + !$omp parallel !$omp do schedule(runtime) private(k, cell1, cell2, dcEdgeInv, & !$omp& drhoDT, drhoDS, dTdx, dSdx, drhoDx) do iEdge = 1, nEdges @@ -515,6 +523,7 @@ subroutine ocn_GM_compute_Bolus_velocity(statePool, diagnosticsPool, & end do end do !$omp end do + !$omp end parallel ! For config_GM_closure = 'N2_dependent' use a scaling to taper gmBolusKappa ! based on stratification relative to the maximum in the column @@ -544,6 +553,7 @@ subroutine ocn_GM_compute_Bolus_velocity(statePool, diagnosticsPool, & ! For config_GM_closure = 'N2_dependent' or 'visbeck' compute a spatially variable ! baroclinic phase speed, the mode can be specified by config_GM_baroclinic_mode if (local_config_GM_lat_variable_c2) then + !$omp parallel !$omp do schedule(runtime) private(k, cell1, cell2, sumN2, ltSum, countN2, BruntVaisalaFreqTopEdge) do iEdge = 1, nEdges cell1 = cellsOnEdge(1, iEdge) @@ -573,13 +583,16 @@ subroutine ocn_GM_compute_Bolus_velocity(statePool, diagnosticsPool, & end do !$omp end do + !$omp end parallel else !constant phase speed for config_GM_closure = 'constant' + !$omp parallel !$omp do schedule(runtime) do iEdge = 1, nEdges cGMphaseSpeed(iEdge) = config_GM_constant_gravWaveSpeed end do !$omp end do + !$omp end parallel end if ! When config_GM_closure = 'visbeck' actually modify the value of gmBolusKappa based on @@ -645,8 +658,16 @@ subroutine ocn_GM_compute_Bolus_velocity(statePool, diagnosticsPool, & endif nEdges = nEdgesArray(3) - !$omp do schedule(runtime) private(cell1, cell2, k, gradDensityTopOfEdge, & - !$omp& kappaGMEdge, BruntVaisalaFreqTopEdge, N) + !$omp parallel + + allocate (rightHandSide(nVertLevels)) + allocate (tridiagA(nVertLevels)) + allocate (tridiagB(nVertLevels)) + allocate (tridiagC(nVertLevels)) + + !$omp do schedule(runtime) & + !$omp private(cell1, cell2, k, gradDensityTopOfEdge, kappaGMEdge, BruntVaisalaFreqTopEdge) & + !$omp firstprivate(tridiagA, tridiagB, tridiagC, rightHandSide, N) do iEdge = 1, nEdges cell1 = cellsOnEdge(1, iEdge) cell2 = cellsOnEdge(2, iEdge) @@ -731,9 +752,11 @@ subroutine ocn_GM_compute_Bolus_velocity(statePool, diagnosticsPool, & end if end do !$omp end do + !$omp end parallel nEdges = nEdgesArray(3) ! Compute normalGMBolusVelocity from the stream function and apply resolution taper of GM here + !$omp parallel !$omp do schedule(runtime) private(k) do iEdge = 1, nEdges do k = 1, maxLevelEdgeTop(iEdge) @@ -742,10 +765,12 @@ subroutine ocn_GM_compute_Bolus_velocity(statePool, diagnosticsPool, & end do end do !$omp end do + !$omp end parallel nCells = nCellsArray(1) ! Interpolate gmStreamFuncTopOfEdge to cell centers for visualization + !$omp parallel !$omp do schedule(runtime) private(i, iEdge, areaEdge, k, rtmp) do iCell = 1, nCells gmStreamFuncTopOfCell(:, iCell) = 0.0_RKIND @@ -767,11 +792,7 @@ subroutine ocn_GM_compute_Bolus_velocity(statePool, diagnosticsPool, & gmStreamFuncTopOfCell(:, iCell) = gmStreamFuncTopOfCell(:, iCell)/areaCell(iCell) end do !$omp end do - - deallocate (rightHandSide) - deallocate (tridiagA) - deallocate (tridiagB) - deallocate (tridiagC) + !$omp end parallel end if !end config_use_GM @@ -919,11 +940,13 @@ subroutine ocn_GM_init(domain, err)!{{{ local_config_GM_lat_variable_c2 = .false. local_config_GM_kappa_lat_depth_variable = .false. local_config_GM_compute_visbeck = .false. + !$omp parallel !$omp do schedule(runtime) do iEdge = 1, nEdges gmBolusKappa(iEdge) = config_GM_kappa end do !$omp end do + !$omp end parallel RediGMinitValue = 1.0_RKIND else if (config_GM_closure == 'N2_dependent') then local_config_GM_lat_variable_c2 = .true. @@ -931,11 +954,13 @@ subroutine ocn_GM_init(domain, err)!{{{ local_config_GM_compute_visbeck = .false. RediGMinitValue = 0.0_RKIND ! for N2 dependence, we still assign Kappa as a constant. + !$omp parallel !$omp do schedule(runtime) do iEdge = 1, nEdges gmBolusKappa(iEdge) = config_GM_kappa end do !$omp end do + !$omp end parallel else if (config_GM_closure == 'visbeck') then local_config_GM_lat_variable_c2 = .true. local_config_GM_kappa_lat_depth_variable = .false. @@ -950,6 +975,7 @@ subroutine ocn_GM_init(domain, err)!{{{ gmBolusKappa(iEdge) = config_GM_visbeck_max_kappa end do !$omp end do + !$omp end parallel else call mpas_log_write('Invalid choice of config_GM_closure.', MPAS_LOG_CRIT) err = 1 @@ -965,7 +991,8 @@ subroutine ocn_GM_init(domain, err)!{{{ !$omp end do ! Nothing to do, as we just keep the same assignment as above. else if (config_eddying_resolution_taper == 'ramp') then - !$omp do schedule(runtime) + !$omp parallel + !$omp do schedule(runtime) private( avgCellDiameter) do iEdge = 1, nEdges if (dcEdge(iEdge) <= config_eddying_resolution_ramp_min) then gmResolutionTaper(iEdge) = 0.0_RKIND @@ -978,6 +1005,7 @@ subroutine ocn_GM_init(domain, err)!{{{ end if end do !$omp end do + !$omp end parallel else call mpas_log_write('Invalid choice of config_eddying_resolution_taper.', MPAS_LOG_CRIT) err = 1 From 0a3faa663077f2040c0b79172838ccd4b8e1b5c8 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 28 May 2020 10:01:48 -0700 Subject: [PATCH 05/16] Remove main loop index from omp private --- .../mpas_ocn_time_integration_rk4.F | 42 ++++++------ .../mpas_ocn_time_integration_split.F | 66 +++++++++---------- src/core_ocean/shared/mpas_ocn_diagnostics.F | 20 +++--- .../shared/mpas_ocn_equation_of_state_jm.F | 12 ++-- src/core_ocean/shared/mpas_ocn_sea_ice.F | 2 +- .../shared/mpas_ocn_surface_land_ice_fluxes.F | 6 +- src/core_ocean/shared/mpas_ocn_thick_ale.F | 2 +- .../shared/mpas_ocn_time_varying_forcing.F | 2 +- .../shared/mpas_ocn_tracer_advection_mono.F | 32 ++++----- .../shared/mpas_ocn_tracer_hmix_del2.F | 2 +- ..._ocn_tracer_short_wave_absorption_jerlov.F | 2 +- ...cn_tracer_short_wave_absorption_variable.F | 2 +- .../mpas_ocn_vel_forcing_surface_stress.F | 2 +- .../shared/mpas_ocn_vel_hadv_coriolis.F | 2 +- .../shared/mpas_ocn_vel_hmix_del4.F | 8 +-- .../shared/mpas_ocn_vel_pressure_grad.F | 4 +- src/core_ocean/shared/mpas_ocn_vmix.F | 6 +- .../shared/mpas_ocn_wetting_drying.F | 2 +- 18 files changed, 107 insertions(+), 107 deletions(-) diff --git a/src/core_ocean/mode_forward/mpas_ocn_time_integration_rk4.F b/src/core_ocean/mode_forward/mpas_ocn_time_integration_rk4.F index 553f0ba8f7..3be4e6ff05 100644 --- a/src/core_ocean/mode_forward/mpas_ocn_time_integration_rk4.F +++ b/src/core_ocean/mode_forward/mpas_ocn_time_integration_rk4.F @@ -304,7 +304,7 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ if (associated(lowFreqDivergenceCur)) then !$omp parallel - !$omp do schedule(runtime) private(iCell) + !$omp do schedule(runtime) do iCell = 1, nCells lowFreqDivergenceNew(:, iCell) = lowFreqDivergenceCur(:, iCell) end do @@ -462,7 +462,7 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ call mpas_pool_get_dimension(block % dimensions, 'nCells', nCells) !$omp parallel - !$omp do schedule(runtime) private(iCell) + !$omp do schedule(runtime) do iCell = 1, nCells highFreqThicknessProvis(:, iCell) = highFreqThicknessCur(:, iCell) + rk_substep_weights(rk_step) & * highFreqThicknessTend(:, iCell) @@ -686,7 +686,7 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ if (config_prescribe_velocity) then !$omp parallel - !$omp do schedule(runtime) private(iEdge) + !$omp do schedule(runtime) do iEdge = 1, nEdges normalVelocityNew(:, iEdge) = normalVelocityCur(:, iEdge) end do @@ -696,7 +696,7 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ if (config_prescribe_thickness) then !$omp parallel - !$omp do schedule(runtime) private(iCell) + !$omp do schedule(runtime) do iCell = 1, nCells layerThicknessNew(:, iCell) = layerThicknessCur(:, iCell) end do @@ -734,7 +734,7 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ ! Accumulating various parameterizations of the transport velocity ! ------------------------------------------------------------------ !$omp parallel - !$omp do schedule(runtime) private(iEdge) + !$omp do schedule(runtime) do iEdge = 1, nEdges normalTransportVelocity(:, iEdge) = normalVelocityNew(:, iEdge) end do @@ -750,7 +750,7 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ if (config_use_GM) then !$omp parallel - !$omp do schedule(runtime) private(iEdge) + !$omp do schedule(runtime) do iEdge = 1, nEdges normalTransportVelocity(:, iEdge) = normalTransportVelocity(:, iEdge) + normalGMBolusVelocity(:, iEdge) end do @@ -773,7 +773,7 @@ subroutine ocn_time_integrator_rk4(domain, dt)!{{{ call mpas_threading_barrier() !$omp parallel - !$omp do schedule(runtime) private(iCell) + !$omp do schedule(runtime) do iCell = 1, nCells surfaceVelocity(indexSurfaceVelocityZonal, iCell) = velocityZonal(1, iCell) surfaceVelocity(indexSurfaceVelocityMeridional, iCell) = velocityMeridional(1, iCell) @@ -1184,7 +1184,7 @@ subroutine ocn_time_integrator_rk4_diagnostic_update(block, dt, rkSubstepWeight, call mpas_threading_barrier() !$omp parallel - !$omp do schedule(runtime) private(k, iEdge) + !$omp do schedule(runtime) private(k) do iEdge = 1, nEdges do k = 1, maxLevelEdgeTop(iEdge) normalVelocityProvis(k, iEdge) = normalVelocityCur(k, iEdge) + rkSubstepWeight & @@ -1194,7 +1194,7 @@ subroutine ocn_time_integrator_rk4_diagnostic_update(block, dt, rkSubstepWeight, end do !$omp end do - !$omp do schedule(runtime) private(k, iCell) + !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) layerThicknessProvis(k, iCell) = layerThicknessCur(k, iCell) + rkSubstepWeight & @@ -1218,7 +1218,7 @@ subroutine ocn_time_integrator_rk4_diagnostic_update(block, dt, rkSubstepWeight, call mpas_pool_get_array(tracersTendPool, modifiedGroupName, tracersGroupTend) if ( associated(tracersGroupProvis) .and. associated(tracersGroupCur) .and. associated(tracersGroupTend) ) then !$omp parallel - !$omp do schedule(runtime) private(k, iCell) + !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) tracersGroupProvis(:, k, iCell) = ( layerThicknessCur(k, iCell) * tracersGroupCur(:, k, iCell) & @@ -1236,7 +1236,7 @@ subroutine ocn_time_integrator_rk4_diagnostic_update(block, dt, rkSubstepWeight, if (associated(lowFreqDivergenceCur)) then !$omp parallel - !$omp do schedule(runtime) private(iCell) + !$omp do schedule(runtime) do iCell = 1, nCells lowFreqDivergenceProvis(:, iCell) = lowFreqDivergenceCur(:, iCell) + rkSubstepWeight & * lowFreqDivergenceTend(:, iCell) @@ -1247,7 +1247,7 @@ subroutine ocn_time_integrator_rk4_diagnostic_update(block, dt, rkSubstepWeight, if (config_prescribe_velocity) then !$omp parallel - !$omp do schedule(runtime) private(iEdge) + !$omp do schedule(runtime) do iEdge = 1, nEdges normalVelocityProvis(:, iEdge) = normalVelocityCur(:, iEdge) end do @@ -1257,7 +1257,7 @@ subroutine ocn_time_integrator_rk4_diagnostic_update(block, dt, rkSubstepWeight, if (config_prescribe_thickness) then !$omp parallel - !$omp do schedule(runtime) private(iCell) + !$omp do schedule(runtime) do iCell = 1, nCells layerThicknessProvis(:, iCell) = layerThicknessCur(:, iCell) end do @@ -1273,7 +1273,7 @@ subroutine ocn_time_integrator_rk4_diagnostic_update(block, dt, rkSubstepWeight, ! Accumulating various parametrizations of the transport velocity ! ------------------------------------------------------------------ !$omp parallel - !$omp do schedule(runtime) private(iEdge) + !$omp do schedule(runtime) do iEdge = 1, nEdges normalTransportVelocity(:, iEdge) = normalVelocityProvis(:, iEdge) end do @@ -1289,7 +1289,7 @@ subroutine ocn_time_integrator_rk4_diagnostic_update(block, dt, rkSubstepWeight, if (config_use_GM) then !$omp parallel - !$omp do schedule(runtime) private(iEdge) + !$omp do schedule(runtime) do iEdge = 1, nEdges normalTransportVelocity(:, iEdge) = normalTransportVelocity(:, iEdge) + normalGMBolusVelocity(:,iEdge) end do @@ -1361,7 +1361,7 @@ subroutine ocn_time_integrator_rk4_accumulate_update(block, rkWeight, err)!{{{ call mpas_pool_get_array(meshPool, 'maxLevelCell', maxLevelCell) !$omp parallel - !$omp do schedule(runtime) private(k, iCell) + !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) layerThicknessNew(k, iCell) = layerThicknessNew(k, iCell) + rkWeight * layerThicknessTend(k, iCell) @@ -1369,7 +1369,7 @@ subroutine ocn_time_integrator_rk4_accumulate_update(block, rkWeight, err)!{{{ end do !$omp end do - !$omp do schedule(runtime) private(iEdge) + !$omp do schedule(runtime) do iEdge = 1, nEdges normalVelocityNew(:, iEdge) = normalVelocityNew(:, iEdge) + rkWeight * normalVelocityTend(:, iEdge) normalVelocityNew(:, iEdge) = normalVelocityNew(:, iEdge) * (1.0_RKIND - wettingVelocity(:, iEdge)) @@ -1390,7 +1390,7 @@ subroutine ocn_time_integrator_rk4_accumulate_update(block, rkWeight, err)!{{{ call mpas_pool_get_array(tracersTendPool, modifiedGroupName, tracersGroupTend) if ( associated(tracersGroupNew) .and. associated(tracersGroupTend) ) then !$omp parallel - !$omp do schedule(runtime) private(k, iCell) + !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) tracersGroupNew(:, k, iCell) = tracersGroupNew(:, k, iCell) + rkWeight & @@ -1406,7 +1406,7 @@ subroutine ocn_time_integrator_rk4_accumulate_update(block, rkWeight, err)!{{{ if (associated(highFreqThicknessNew)) then !$omp parallel - !$omp do schedule(runtime) private(iCell) + !$omp do schedule(runtime) do iCell = 1, nCells highFreqThicknessNew(:, iCell) = highFreqThicknessNew(:, iCell) + rkWeight * highFreqThicknessTend(:, iCell) end do @@ -1416,7 +1416,7 @@ subroutine ocn_time_integrator_rk4_accumulate_update(block, rkWeight, err)!{{{ if (associated(lowFreqDivergenceNew)) then !$omp parallel - !$omp do schedule(runtime) private(iCell) + !$omp do schedule(runtime) do iCell = 1, nCells lowFreqDivergenceNew(:, iCell) = lowFreqDivergenceNew(:, iCell) + rkWeight * lowFreqDivergenceTend(:, iCell) end do @@ -1483,7 +1483,7 @@ subroutine ocn_time_integrator_rk4_cleanup(block, dt, err)!{{{ call mpas_pool_get_array(tracersPool, groupItr % memberName, tracersGroupNew, 2) if ( associated(tracersGroupNew) ) then !$omp parallel - !$omp do schedule(runtime) private(k, iCell) + !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) tracersGroupNew(:, k, iCell) = tracersGroupNew(:, k, iCell) / layerThicknessNew(k, iCell) diff --git a/src/core_ocean/mode_forward/mpas_ocn_time_integration_split.F b/src/core_ocean/mode_forward/mpas_ocn_time_integration_split.F index 1354d1a958..0d00b4c42a 100644 --- a/src/core_ocean/mode_forward/mpas_ocn_time_integration_split.F +++ b/src/core_ocean/mode_forward/mpas_ocn_time_integration_split.F @@ -323,7 +323,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ ! Initialize * variables that are used to compute baroclinic tendencies below. !$omp parallel - !$omp do schedule(runtime) private(k, iEdge) + !$omp do schedule(runtime) private(k) do iEdge = 1, nEdges do k = 1, nVertLevels !maxLevelEdgeTop % array(iEdge) @@ -342,7 +342,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end do !$omp end do - !$omp do schedule(runtime) private(k, iCell) + !$omp do schedule(runtime) private(k) do iCell = 1, nCells sshNew(iCell) = sshCur(iCell) do k = 1, maxLevelCell(iCell) @@ -362,7 +362,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ if ( associated(tracersGroupCur) .and. associated(tracersGroupNew) ) then !$omp parallel - !$omp do schedule(runtime) private(k, iCell) + !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) tracersGroupNew(:,k,iCell) = tracersGroupCur(:,k,iCell) @@ -377,7 +377,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ if (associated(highFreqThicknessNew)) then !$omp parallel - !$omp do schedule(runtime) private(iCell) + !$omp do schedule(runtime) do iCell = 1, nCells highFreqThicknessNew(:, iCell) = highFreqThicknessCur(:, iCell) end do @@ -387,7 +387,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ if (associated(lowFreqDivergenceNew)) then !$omp parallel - !$omp do schedule(runtime) private(iCell) + !$omp do schedule(runtime) do iCell = 1, nCells lowFreqDivergenceNew(:, iCell) = lowFreqDivergenceCur(:, iCell) end do @@ -486,7 +486,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ nCells = nCellsPtr !$omp parallel - !$omp do schedule(runtime) private(k, iCell) + !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) ! this is h^{hf}_{n+1} @@ -581,7 +581,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ !$omp parallel !$omp do schedule(runtime) & - !$omp private(iEdge, cell1, cell2, uTemp, k, normalThicknessFluxSum, thicknessSum) + !$omp private(cell1, cell2, uTemp, k, normalThicknessFluxSum, thicknessSum) do iEdge = 1, nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) @@ -688,7 +688,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ ! uNew=normalBaroclinicVelocityNew !$omp parallel - !$omp do schedule(runtime) private(k, iEdge) + !$omp do schedule(runtime) private(k) do iEdge = 1, nEdges normalBarotropicVelocityNew(iEdge) = 0.0_RKIND do k = 1, nVertLevels @@ -739,7 +739,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ if (config_filter_btr_mode) then !$omp parallel - !$omp do schedule(runtime) private(iEdge) + !$omp do schedule(runtime) do iEdge = 1, nEdges barotropicForcing(iEdge) = 0.0_RKIND end do @@ -748,14 +748,14 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ endif !$omp parallel - !$omp do schedule(runtime) private(iCell) + !$omp do schedule(runtime) do iCell = 1, nCells ! sshSubcycleOld = sshOld sshSubcycleCur(iCell) = sshCur(iCell) end do !$omp end do - !$omp do schedule(runtime) private(iEdge) + !$omp do schedule(runtime) do iEdge = 1, nEdges ! normalBarotropicVelocitySubcycleOld = normalBarotropicVelocityOld @@ -855,7 +855,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ !$omp parallel !$omp do schedule(runtime) & - !$omp private(iEdge, temp_mask, cell1, cell2, CoriolisTerm, i, eoe) + !$omp private(temp_mask, cell1, cell2, CoriolisTerm, i, eoe) do iEdge = 1, nEdges temp_mask = edgeMask(1, iEdge) @@ -940,7 +940,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ !$omp parallel !$omp do schedule(runtime) & - !$omp private(iCell, i, iEdge, cell1, cell2, sshEdge, thicknessSum, flux) + !$omp private(i, iEdge, cell1, cell2, sshEdge, thicknessSum, flux) do iCell = 1, nCells sshTend(iCell) = 0.0_RKIND do i = 1, nEdgesOnCell(iCell) @@ -995,7 +995,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ !$omp parallel !$omp do schedule(runtime) & - !$omp private(iEdge, cell1, cell2, sshEdge, thicknessSum, flux) + !$omp private(cell1, cell2, sshEdge, thicknessSum, flux) do iEdge = 1, nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) @@ -1090,7 +1090,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ nEdges = nEdgesArray( min(edgeHaloComputeCounter + 1, config_num_halos + 1) ) !$omp parallel - !$omp do schedule(runtime) private(iEdge) + !$omp do schedule(runtime) do iEdge = 1, nEdges+1 btrvel_temp(iEdge) = normalBarotropicVelocitySubcycleNew(iEdge) end do @@ -1101,7 +1101,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ !$omp parallel !$omp do schedule(runtime) & - !$omp private(iEdge, temp_mask, cell1, cell2, coriolisTerm, eoe, sshCell1, sshCell2) + !$omp private(temp_mask, cell1, cell2, coriolisTerm, eoe, sshCell1, sshCell2) do iEdge = 1, nEdges ! asarje: added to avoid redundant computations based on mask @@ -1300,7 +1300,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ nEdges = nEdgesPtr !$omp parallel - !$omp do schedule(runtime) private(iEdge) + !$omp do schedule(runtime) do iEdge = 1, nEdges normalBarotropicVelocityNew(iEdge) = normalBarotropicVelocityNew(iEdge) & + normalBarotropicVelocitySubcycleNew(iEdge) @@ -1345,7 +1345,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ nEdges = nEdgesArray(1) !$omp parallel - !$omp do schedule(runtime) private(iEdge) + !$omp do schedule(runtime) do iEdge = 1, nEdges barotropicThicknessFlux(iEdge) = barotropicThicknessFlux(iEdge) & / (nBtrSubcycles * config_btr_subcycle_loop_factor) @@ -1421,7 +1421,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ !$omp parallel !$omp do schedule(runtime) & - !$omp private(iEdge, uTemp, normalThicknessFluxSum, thicknessSum, k, & + !$omp private(uTemp, normalThicknessFluxSum, thicknessSum, k, & !$omp normalVelocityCorrection) do iEdge = 1, nEdges @@ -1632,7 +1632,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ ! then all the tracers needed the last time through. !$omp parallel - !$omp do schedule(runtime) private(iCell, k, temp_h, temp, i) + !$omp do schedule(runtime) private(k, temp_h, temp, i) do iCell = 1, nCells ! sshNew is a pointer, defined above. do k = 1, maxLevelCell(iCell) @@ -1658,7 +1658,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ if (config_use_freq_filtered_thickness) then !$omp parallel - !$omp do schedule(runtime) private(iCell, k, temp) + !$omp do schedule(runtime) private(k, temp) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) @@ -1680,7 +1680,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ end if !$omp parallel - !$omp do schedule(runtime) private(k, iEdge) + !$omp do schedule(runtime) private(k) do iEdge = 1, nEdges do k = 1, nVertLevels @@ -1709,7 +1709,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ elseif (split_explicit_step == config_n_ts_iter) then !$omp parallel - !$omp do schedule(runtime) private(k, iCell) + !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) ! this is h_{n+1} @@ -1733,7 +1733,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ call mpas_pool_get_array(diagnosticsPool, 'layerThicknessEdge', layerThicknessEdge) !$omp parallel - !$omp do schedule(runtime) private(k, iEdge) + !$omp do schedule(runtime) private(k) do iEdge = 1, nEdges do k= 1, maxLevelEdgeTop(iEdge) activeTracerHorizontalAdvectionEdgeFlux(:,k,iEdge) = & @@ -1743,7 +1743,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ enddo !$omp end do - !$omp do schedule(runtime) private(k, iCell) + !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k= 1, maxLevelCell(iCell) activeTracerHorizontalAdvectionTendency(:,k,iCell) = & @@ -1789,7 +1789,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ call mpas_pool_get_array(tracersTendPool, modifiedGroupName, tracersGroupTend) !$omp parallel - !$omp do schedule(runtime) private(k, iCell) + !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) tracersGroupNew(:,k,iCell) = (tracersGroupCur(:,k,iCell) * layerThicknessCur(k,iCell) + dt & @@ -1802,7 +1802,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ ! limit salinity in separate loop if ( trim(groupItr % memberName) == 'activeTracers' ) then !$omp parallel - !$omp do schedule(runtime) private(k, iCell) + !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) tracersGroupNew(indexSalinity,k,iCell) = max(0.001_RKIND, tracersGroupNew(indexSalinity,k,iCell)) @@ -1818,7 +1818,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ call mpas_pool_get_array(meshPool, 'lonCell', lonCell) if (config_reset_debugTracers_near_surface) then !$omp parallel - !$omp do schedule(runtime) private(iCell, k, lat) + !$omp do schedule(runtime) private(k, lat) do iCell = 1, nCells ! Reset tracer1 to 2 in top n layers @@ -1872,7 +1872,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ if (config_use_freq_filtered_thickness) then !$omp parallel - !$omp do schedule(runtime) private(k, iCell) + !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) @@ -1896,7 +1896,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ ! so normalBaroclinicVelocity does not have to be recomputed here. !$omp parallel - !$omp do schedule(runtime) private(k, iEdge) + !$omp do schedule(runtime) private(k) do iEdge = 1, nEdges do k = 1, maxLevelEdgeTop(iEdge) normalVelocityNew(k,iEdge) = normalBarotropicVelocityNew(iEdge) + 2 * normalBaroclinicVelocityNew(k,iEdge) & @@ -2032,7 +2032,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ if (config_prescribe_velocity) then !$omp parallel - !$omp do schedule(runtime) private(iEdge) + !$omp do schedule(runtime) do iEdge = 1, nEdges normalVelocityNew(:, iEdge) = normalVelocityCur(:, iEdge) end do @@ -2042,7 +2042,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ if (config_prescribe_thickness) then !$omp parallel - !$omp do schedule(runtime) private(iCell) + !$omp do schedule(runtime) do iCell = 1, nCells layerThicknessNew(:, iCell) = layerThicknessCur(:, iCell) end do @@ -2076,7 +2076,7 @@ subroutine ocn_time_integrator_split(domain, dt)!{{{ call mpas_timer_stop('se final mpas reconstruct') !$omp parallel - !$omp do schedule(runtime) private(iCell) + !$omp do schedule(runtime) do iCell = 1, nCells surfaceVelocity(indexSurfaceVelocityZonal, iCell) = velocityZonal(1, iCell) surfaceVelocity(indexSurfaceVelocityMeridional, iCell) = velocityMeridional(1, iCell) diff --git a/src/core_ocean/shared/mpas_ocn_diagnostics.F b/src/core_ocean/shared/mpas_ocn_diagnostics.F index 6abf089edb..b39c2d30ac 100644 --- a/src/core_ocean/shared/mpas_ocn_diagnostics.F +++ b/src/core_ocean/shared/mpas_ocn_diagnostics.F @@ -316,7 +316,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic if (.not. config_use_wetting_drying .or. (config_use_wetting_drying .and. (trim(config_thickness_flux_type) .eq. 'centered'))) then !$omp parallel - !$omp do schedule(runtime) private(cell1, cell2, k, iEdge) + !$omp do schedule(runtime) private(cell1, cell2, k) do iEdge = 1, nEdges ! initialize layerThicknessEdge to avoid divide by zero and NaN problems. layerThicknessEdge(:, iEdge) = -1.0e34_RKIND @@ -333,7 +333,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic if (config_use_wetting_drying .and. (trim(config_thickness_flux_type) .ne. 'centered')) then if ( trim(config_thickness_flux_type) .eq. 'upwind') then !$omp parallel - !$omp do schedule(runtime) private(cell1, cell2, k, iEdge) + !$omp do schedule(runtime) private(cell1, cell2, k) do iEdge = 1, nEdges ! initialize layerThicknessEdge to avoid divide by zero and NaN problems. layerThicknessEdge(:, iEdge) = -1.0e34_RKIND @@ -376,7 +376,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic nCells = nCellsArray( 1 ) !$omp parallel - !$omp do schedule(runtime) private(invAreaCell1, i, j, k, iVertex, iCell) + !$omp do schedule(runtime) private(invAreaCell1, i, j, k, iVertex) do iCell = 1, nCells relativeVorticityCell(:,iCell) = 0.0_RKIND invAreaCell1 = 1.0_RKIND / areaCell(iCell) @@ -449,7 +449,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic nEdges = nEdgesArray( 2 ) !$omp parallel - !$omp do schedule(runtime) private(iEdge, i, eoe, weightsOnEdge_temp, k) + !$omp do schedule(runtime) private(i, eoe, weightsOnEdge_temp, k) do iEdge = 1, nEdges tangentialVelocity(:, iEdge) = 0.0_RKIND ! Compute v (tangential) velocities @@ -707,7 +707,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic allocate(pTop(nVertLevels)) !$omp parallel - !$omp do schedule(runtime) private(iCell, pTop, k) + !$omp do schedule(runtime) private(pTop, k) do iCell = 1, nCells ! assume atmospheric pressure at the surface is zero for now. @@ -844,7 +844,7 @@ subroutine ocn_diagnostic_solve(dt, statePool, forcingPool, meshPool, diagnostic nCells = nCellsArray( 1 ) !$omp parallel - !$omp do schedule(runtime) private(iCell, surfaceLayerDepth, sumSurfaceLayer, k, rSurfaceLayer) + !$omp do schedule(runtime) private(surfaceLayerDepth, sumSurfaceLayer, k, rSurfaceLayer) do iCell=1,nCells surfaceLayerDepth = config_cvmix_kpp_surface_layer_averaging sumSurfaceLayer=0.0_RKIND @@ -1258,7 +1258,7 @@ subroutine ocn_filter_btr_mode_vel(statePool, diagnosticsPool, meshPool, timeLev call mpas_pool_get_dimension(meshPool, 'nEdges', nEdges) !$omp parallel - !$omp do schedule(runtime) private(iEdge, normalThicknessFluxSum, thicknessSum, k, vertSum) + !$omp do schedule(runtime) private(normalThicknessFluxSum, thicknessSum, k, vertSum) do iEdge = 1, nEdges ! thicknessSum is initialized outside the loop because on land boundaries @@ -1329,7 +1329,7 @@ subroutine ocn_filter_btr_mode_tend_vel(tendPool, statePool, diagnosticsPool, me call mpas_pool_get_dimension(meshPool, 'nEdges', nEdges) !$omp parallel - !$omp do schedule(runtime) private(iEdge, normalThicknessFluxSum, thicknessSum, k, vertSum) + !$omp do schedule(runtime) private(normalThicknessFluxSum, thicknessSum, k, vertSum) do iEdge = 1, nEdges ! thicknessSum is initialized outside the loop because on land boundaries @@ -1539,7 +1539,7 @@ subroutine ocn_compute_KPP_input_fields(statePool, forcingPool, meshPool, diagno !$omp parallel !$omp do schedule(runtime) & - !$omp private(iCell, invAreaCell, fracAbsorbed, fracAbsorbedRunoff, deltaVelocitySquared, i, & + !$omp private(invAreaCell, fracAbsorbed, fracAbsorbedRunoff, deltaVelocitySquared, i, & !$omp iEdge, factor, delU2) do iCell = 1, nCells ! compute surface buoyancy forcing based on surface fluxes of mass, temperature, salinity and frazil @@ -1785,7 +1785,7 @@ subroutine ocn_compute_land_ice_flux_input_fields(meshPool, statePool, & !$omp end do ! average temperature and salinity over horizontal neighbors and the sub-ice-shelf boundary layer - !$omp do schedule(runtime) private(iCell, blThickness, iLevel, dz) + !$omp do schedule(runtime) private(blThickness, iLevel, dz) do iCell = 1, nCells blThickness = 0.0_RKIND blTempScratch(iCell) = 0.0_RKIND diff --git a/src/core_ocean/shared/mpas_ocn_equation_of_state_jm.F b/src/core_ocean/shared/mpas_ocn_equation_of_state_jm.F index 9006e294f1..751a89f001 100644 --- a/src/core_ocean/shared/mpas_ocn_equation_of_state_jm.F +++ b/src/core_ocean/shared/mpas_ocn_equation_of_state_jm.F @@ -305,7 +305,7 @@ subroutine ocn_equation_of_state_jm_density_only(nVertLevels, & if (displacementType == 'surfaceDisplaced') then !$omp parallel - !$omp do schedule(runtime) private(iCell, k, tq, sq) + !$omp do schedule(runtime) private(k, tq, sq) do iCell=1,nCells do k=1,nVertLevels tq = min(tracersSurfaceLayerValue(indexT,iCell), & @@ -322,7 +322,7 @@ subroutine ocn_equation_of_state_jm_density_only(nVertLevels, & else !$omp parallel - !$omp do schedule(runtime) private(iCell, k, tq, sq) + !$omp do schedule(runtime) private(k, tq, sq) do iCell=1,nCells do k = 1, nVertLevels tq = min(tracers(indexT,k,iCell), ocnEqStateTmax) @@ -343,7 +343,7 @@ subroutine ocn_equation_of_state_jm_density_only(nVertLevels, & #else !$omp parallel !$omp do schedule(runtime) & - !$omp private(iCell, k, sq, tq, sqr, t2, work1, work2, rhosfc, work3, work4, bulkMod) + !$omp private(k, sq, tq, sqr, t2, work1, work2, rhosfc, work3, work4, bulkMod) #endif do iCell=1,nCells do k=1,nVertLevels @@ -576,7 +576,7 @@ subroutine ocn_equation_of_state_jm_density_exp(nVertLevels, & if (displacementType == 'surfaceDisplaced') then !$omp parallel - !$omp do schedule(runtime) private(iCell, k, tq, sq) + !$omp do schedule(runtime) private(k, tq, sq) do iCell=1,nCells do k=1,nVertLevels tq = min(tracersSurfaceLayerValue(indexT,iCell), & @@ -593,7 +593,7 @@ subroutine ocn_equation_of_state_jm_density_exp(nVertLevels, & else !$omp parallel - !$omp do schedule(runtime) private(iCell, k, tq, sq) + !$omp do schedule(runtime) private(k, tq, sq) do iCell=1,nCells do k = 1, nVertLevels tq = min(tracers(indexT,k,iCell), ocnEqStateTmax) @@ -618,7 +618,7 @@ subroutine ocn_equation_of_state_jm_density_exp(nVertLevels, & #else !$omp parallel !$omp do schedule(runtime) & - !$omp private(iCell, k, sq, tq, sqr, t2, work1, work2, rhosfc, work3, work4, bulkMod, & + !$omp private(k, sq, tq, sqr, t2, work1, work2, rhosfc, work3, work4, bulkMod, & !$omp denomk, drdt0, dkdt, drhodt, drds0, dkds, drhods) #endif do iCell=1,nCells diff --git a/src/core_ocean/shared/mpas_ocn_sea_ice.F b/src/core_ocean/shared/mpas_ocn_sea_ice.F index 432ea6b5a7..c04f6afcdc 100644 --- a/src/core_ocean/shared/mpas_ocn_sea_ice.F +++ b/src/core_ocean/shared/mpas_ocn_sea_ice.F @@ -138,7 +138,7 @@ subroutine ocn_sea_ice_formation(meshPool, indexTemperature, indexSalinity, laye !$omp parallel !$omp do schedule(runtime) & - !$omp private(iCell, maxLevel, netEnergyChange, k, freezingTemp, availableEnergyChange, & + !$omp private(maxLevel, netEnergyChange, k, freezingTemp, availableEnergyChange, & !$omp energyChange, temperatureChange, thicknessChange, iceThicknessChange, iTracer) do iCell = 1, nCellsSolve ! Check performance of these two loop definitions ! do iCell = nCellsSolve, 1, -1 diff --git a/src/core_ocean/shared/mpas_ocn_surface_land_ice_fluxes.F b/src/core_ocean/shared/mpas_ocn_surface_land_ice_fluxes.F index b6f661851e..33cd0e7c28 100644 --- a/src/core_ocean/shared/mpas_ocn_surface_land_ice_fluxes.F +++ b/src/core_ocean/shared/mpas_ocn_surface_land_ice_fluxes.F @@ -495,7 +495,7 @@ subroutine ocn_surface_land_ice_fluxes_build_arrays(meshPool, diagnosticsPool, & if(isomipOn) then !$omp parallel - !$omp do schedule(runtime) private(iCell, freshwaterFlux, heatFlux) + !$omp do schedule(runtime) private(freshwaterFlux, heatFlux) do iCell = 1, nCells if (landIceMask(iCell) == 0) cycle @@ -780,7 +780,7 @@ subroutine compute_melt_fluxes( & Tlatent = latent_heat_fusion_mks/cp_sw !$omp parallel - !$omp do schedule(runtime) private(iCell, T0, dTf_dS, transferVelocityRatio, a, b, c) + !$omp do schedule(runtime) private(T0, dTf_dS, transferVelocityRatio, a, b, c) do iCell = 1, nCells if (mask(iCell) == 0) cycle @@ -928,7 +928,7 @@ subroutine compute_HJ99_melt_fluxes( & cpRatio = cp_land_ice/cp_sw !$omp parallel !$omp do schedule(runtime) reduction(+:err) & - !$omp private(iCell, T0, dTf_dS, transferVelocityRatio, Tlatent, eta, TlatentStar, a, b, c) + !$omp private(T0, dTf_dS, transferVelocityRatio, Tlatent, eta, TlatentStar, a, b, c) do iCell = 1, nCells if (mask(iCell) == 0) cycle diff --git a/src/core_ocean/shared/mpas_ocn_thick_ale.F b/src/core_ocean/shared/mpas_ocn_thick_ale.F index 1599bdc18c..6440b4c844 100644 --- a/src/core_ocean/shared/mpas_ocn_thick_ale.F +++ b/src/core_ocean/shared/mpas_ocn_thick_ale.F @@ -211,7 +211,7 @@ subroutine ocn_ALE_thickness(meshPool, verticalMeshPool, SSH, ALE_thickness, err !$omp parallel !$omp do schedule(runtime) & - !$omp private(iCell, kMax, prelim_ALE_Thickness, remainder, k, newThickness, & + !$omp private(kMax, prelim_ALE_Thickness, remainder, k, newThickness, & !$omp min_ALE_thickness_down, min_ALE_thickness_up) do iCell = 1, nCells kMax = maxLevelCell(iCell) diff --git a/src/core_ocean/shared/mpas_ocn_time_varying_forcing.F b/src/core_ocean/shared/mpas_ocn_time_varying_forcing.F index 8142e271ce..ac0bf333a1 100644 --- a/src/core_ocean/shared/mpas_ocn_time_varying_forcing.F +++ b/src/core_ocean/shared/mpas_ocn_time_varying_forcing.F @@ -367,7 +367,7 @@ subroutine post_atmospheric_forcing(block)!{{{ ramp = tanh((2.0_RKIND*daysSinceStartOfSim)/config_time_varying_atmospheric_forcing_ramp) !$omp parallel - !$omp do schedule(runtime) private(iCell, windStressCoefficient) + !$omp do schedule(runtime) private(windStressCoefficient) do iCell = 1, nCells windSpeedU(iCell) = ramp*windSpeedU(iCell) diff --git a/src/core_ocean/shared/mpas_ocn_tracer_advection_mono.F b/src/core_ocean/shared/mpas_ocn_tracer_advection_mono.F index bfaf5196f3..06fdea1128 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_advection_mono.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_advection_mono.F @@ -249,7 +249,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & ! See notes in commit 2cd4a89d. !$omp parallel - !$omp do schedule(runtime) private(iCell, invAreaCell1, kmax, k, i, iEdge, signedFactor) + !$omp do schedule(runtime) private(invAreaCell1, kmax, k, i, iEdge, signedFactor) do iCell = 1, nCells invAreaCell1 = dt/areaCell(iCell) kmax = maxLevelCell(iCell) @@ -312,7 +312,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & ! surrounding cells for later limiting. !$omp parallel - !$omp do schedule(runtime) private(iCell, k, i, cell2, kmax) + !$omp do schedule(runtime) private(k, i, cell2, kmax) do iCell = 1, nCells do k=1, maxLevelCell(iCell) tracerMin(k,iCell) = tracerCur(k,iCell) @@ -340,7 +340,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & !$omp parallel !$omp do schedule(runtime) & - !$omp private(iEdge, cell1, cell2, k, wgtTmp, sgnTmp, flxTmp, i, iCell, coef1, coef3, & + !$omp private(cell1, cell2, k, wgtTmp, sgnTmp, flxTmp, i, iCell, coef1, coef3, & !$omp tracerWeight) do iEdge = 1, nEdges cell1 = cellsOnEdge(1, iEdge) @@ -418,7 +418,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & !$omp parallel !$omp do schedule(runtime) & - !$omp private(iCell, invAreaCell1, i, iEdge, cell1, cell2, signedFactor, k, & + !$omp private(invAreaCell1, i, iEdge, cell1, cell2, signedFactor, k, & !$omp tracerUpwindNew, tracerMinNew, tracerMaxNew, scaleFactor) do iCell = 1, nCells invAreaCell1 = 1.0_RKIND / areaCell(iCell) @@ -536,7 +536,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & if (computeBudgets) then !$omp parallel - !$omp do schedule(runtime) private(iEdge, k) + !$omp do schedule(runtime) private(k) do iEdge = 1,nEdgesArray( 2 ) do k = 1,maxLevelEdgeTop(iEdge) ! Save u*h*T flux on edge for analysis. This variable will be @@ -547,7 +547,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & enddo !$omp end do - !$omp do schedule(runtime) private(iCell, k) + !$omp do schedule(runtime) private(k) do iCell = 1, nCellsArray( 1 ) do k = 1, maxLevelCell(iCell) activeTracerHorizontalAdvectionTendency(iTracer,k,iCell) = & @@ -564,7 +564,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & ! Check tracer values against local min,max to detect ! non-monotone values and write warning if found !$omp parallel - !$omp do schedule(runtime) private(iCell, k) + !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) if(tracerCur(k,iCell) < tracerMin(k, iCell)-eps) then @@ -626,7 +626,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & ! Determine bounds on tracerCur from neighbor values for limiting !$omp parallel - !$omp do schedule(runtime) private(iCell, kmax, k) + !$omp do schedule(runtime) private(kmax, k) do iCell = 1, nCells kmax = maxLevelCell(iCell) @@ -659,7 +659,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & case (vertOrder4) !$omp parallel - !$omp do schedule(runtime) private(iCell, kmax, k) + !$omp do schedule(runtime) private(kmax, k) do iCell = 1, nCells kmax = maxLevelCell(iCell) do k=3,kmax-1 @@ -676,7 +676,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & case (vertOrder3) !$omp parallel - !$omp do schedule(runtime) private(iCell, kmax, k) + !$omp do schedule(runtime) private(kmax, k) do iCell = 1, nCells kmax = maxLevelCell(iCell) do k=3,kmax-1 @@ -697,7 +697,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & case (vertOrder2) !$omp parallel - !$omp do schedule(runtime) private(iCell, kmax, k, verticalWeightK, verticalWeightKm1) + !$omp do schedule(runtime) private(kmax, k, verticalWeightK, verticalWeightKm1) do iCell = 1, nCells kmax = maxLevelCell(iCell) do k=3,kmax-1 @@ -720,7 +720,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & ! Remove low order flux from the high order flux. ! Store left over high order flux in highOrderFlx array. !$omp parallel - !$omp do schedule(runtime) private(iCell, kmax, verticalWeightK, verticalWeightKm1, k) + !$omp do schedule(runtime) private(kmax, verticalWeightK, verticalWeightKm1, k) do iCell = 1, nCells kmax = maxLevelCell(iCell) ! Next-to-top cell in column is second-order @@ -781,7 +781,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & nCells = nCellsArray( 2 ) !$omp parallel !$omp do schedule(runtime) & - !$omp private(iCell, k, tracerMinNew, tracerMaxNew, tracerUpwindNew, scaleFactor) + !$omp private(k, tracerMinNew, tracerMaxNew, tracerUpwindNew, scaleFactor) do iCell = 1, nCells ! Build the scale factors to limit flux for FCT @@ -820,7 +820,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & ! Accumulate the scaled high order vertical tendencies ! and the upwind tendencies !$omp parallel - !$omp do schedule(runtime) private(iCell, kmax, k, flux) + !$omp do schedule(runtime) private(kmax, k, flux) do iCell = 1, nCells kmax = maxLevelCell(iCell) ! rescale the high order vertical flux @@ -854,7 +854,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & #endif if (computeBudgets) then !$omp parallel - !$omp do schedule(runtime) private(iCell, k) + !$omp do schedule(runtime) private(k) do iCell = 1, nCells do k = 2, maxLevelCell(iCell) activeTracerVerticalAdvectionTopFlux(iTracer,k,iCell) = & @@ -873,7 +873,7 @@ subroutine ocn_tracer_advection_mono_tend(tend, tracers, layerThickness, & nCells = nCellsArray( 1 ) ! Check for monotonicity of new tracer value !$omp parallel - !$omp do schedule(runtime) private(iCell, k, tracerNew) + !$omp do schedule(runtime) private(k, tracerNew) do iCell = 1, nCells do k = 1, maxLevelCell(iCell) ! workTend on the RHS is the total vertical advection tendency diff --git a/src/core_ocean/shared/mpas_ocn_tracer_hmix_del2.F b/src/core_ocean/shared/mpas_ocn_tracer_hmix_del2.F index 4b6eafd199..1cb65b179e 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_hmix_del2.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_hmix_del2.F @@ -155,7 +155,7 @@ subroutine ocn_tracer_hmix_del2_tend(meshPool, layerThicknessEdge, tracers, tend ! !$omp parallel !$omp do schedule(runtime) & - !$omp private(iCell, invAreaCell, i, iEdge, cell1, cell2, r_tmp, k, iTracer, & + !$omp private(invAreaCell, i, iEdge, cell1, cell2, r_tmp, k, iTracer, & !$omp tracer_turb_flux, flux) do iCell = 1, nCells invAreaCell = 1.0_RKIND / areaCell(iCell) diff --git a/src/core_ocean/shared/mpas_ocn_tracer_short_wave_absorption_jerlov.F b/src/core_ocean/shared/mpas_ocn_tracer_short_wave_absorption_jerlov.F index 70470557a5..e1d9775796 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_short_wave_absorption_jerlov.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_short_wave_absorption_jerlov.F @@ -145,7 +145,7 @@ subroutine ocn_tracer_short_wave_absorption_jerlov_tend(meshPool, forcingPool, i nCells = nCellsArray( 3 ) !$omp parallel - !$omp do schedule(runtime) private(iCell, depth, k, weights, depLev) + !$omp do schedule(runtime) private(depth, k, weights, depLev) do iCell = 1, nCells depth = 0.0_RKIND do k = 1, maxLevelCell(iCell) diff --git a/src/core_ocean/shared/mpas_ocn_tracer_short_wave_absorption_variable.F b/src/core_ocean/shared/mpas_ocn_tracer_short_wave_absorption_variable.F index 56e2c6bac2..e48f85ec07 100644 --- a/src/core_ocean/shared/mpas_ocn_tracer_short_wave_absorption_variable.F +++ b/src/core_ocean/shared/mpas_ocn_tracer_short_wave_absorption_variable.F @@ -154,7 +154,7 @@ subroutine ocn_tracer_short_wave_absorption_variable_tend(meshPool, swForcingPoo nCells = nCellsArray( 3 ) !$omp parallel - !$omp do schedule(runtime) private(iCell, depth, cloudRatio, k, depLev) + !$omp do schedule(runtime) private(depth, cloudRatio, k, depLev) do iCell = 1, nCells depth = 0.0_RKIND cloudRatio = min(1.0_RKIND, 1.0_RKIND - penetrativeTemperatureFlux(iCell)/(hflux_factor*(1.0E-15_RKIND + & diff --git a/src/core_ocean/shared/mpas_ocn_vel_forcing_surface_stress.F b/src/core_ocean/shared/mpas_ocn_vel_forcing_surface_stress.F index 36f4a5b0a2..3b598c6ad9 100644 --- a/src/core_ocean/shared/mpas_ocn_vel_forcing_surface_stress.F +++ b/src/core_ocean/shared/mpas_ocn_vel_forcing_surface_stress.F @@ -146,7 +146,7 @@ subroutine ocn_vel_forcing_surface_stress_tend(meshPool, surfaceFluxAttenuationC !$omp parallel !$omp do schedule(runtime) & - !$omp private(iEdge, zTop, cell1, cell2, attenuationCoeff, transmissionCoeffTop, & + !$omp private(zTop, cell1, cell2, attenuationCoeff, transmissionCoeffTop, & !$omp remainingStress, k, zBot) do iEdge = 1, nEdges zTop = 0.0_RKIND diff --git a/src/core_ocean/shared/mpas_ocn_vel_hadv_coriolis.F b/src/core_ocean/shared/mpas_ocn_vel_hadv_coriolis.F index f7b04ef06e..3511c8c3a7 100644 --- a/src/core_ocean/shared/mpas_ocn_vel_hadv_coriolis.F +++ b/src/core_ocean/shared/mpas_ocn_vel_hadv_coriolis.F @@ -151,7 +151,7 @@ subroutine ocn_vel_hadv_coriolis_tend(meshPool, normalizedRelativeVorticityEdge, !$omp parallel !$omp do schedule(runtime) & - !$omp private(iEdge, cell1, cell2, invLength, k, qArr, j, eoe, edgeWeight, workVorticity) + !$omp private(cell1, cell2, invLength, k, qArr, j, eoe, edgeWeight, workVorticity) do iEdge = 1, nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) diff --git a/src/core_ocean/shared/mpas_ocn_vel_hmix_del4.F b/src/core_ocean/shared/mpas_ocn_vel_hmix_del4.F index 01f4e4400b..75dbc1e367 100644 --- a/src/core_ocean/shared/mpas_ocn_vel_hmix_del4.F +++ b/src/core_ocean/shared/mpas_ocn_vel_hmix_del4.F @@ -191,7 +191,7 @@ subroutine ocn_vel_hmix_del4_tend(meshPool, divergence, relativeVorticity, tend, !Compute delsq_u !$omp parallel !$omp do schedule(runtime) & - !$omp private(iEdge, cell1, cell2, vertex1, vertex2, invDcEdge, invDvEdge, k) + !$omp private(cell1, cell2, vertex1, vertex2, invDcEdge, invDvEdge, k) do iEdge = 1, nEdges delsq_u(:, iEdge) = 0.0_RKIND cell1 = cellsOnEdge(1,iEdge) @@ -216,7 +216,7 @@ subroutine ocn_vel_hmix_del4_tend(meshPool, divergence, relativeVorticity, tend, ! Compute delsq_relativeVorticity !$omp parallel - !$omp do schedule(runtime) private(iVertex, invAreaTri1, i, iEdge, k) + !$omp do schedule(runtime) private(invAreaTri1, i, iEdge, k) do iVertex = 1, nVertices delsq_relativeVorticity(:, iVertex) = 0.0_RKIND invAreaTri1 = 1.0_RKIND / areaTriangle(iVertex) @@ -235,7 +235,7 @@ subroutine ocn_vel_hmix_del4_tend(meshPool, divergence, relativeVorticity, tend, ! Compute delsq_divergence !$omp parallel - !$omp do schedule(runtime) private(iCell, invAreaCell1, i, iEdge, k) + !$omp do schedule(runtime) private(invAreaCell1, i, iEdge, k) do iCell = 1, nCells delsq_divergence(:, iCell) = 0.0_RKIND invAreaCell1 = 1.0_RKIND / areaCell(iCell) @@ -256,7 +256,7 @@ subroutine ocn_vel_hmix_del4_tend(meshPool, divergence, relativeVorticity, tend, ! as \nabla div(\nabla^2 u) + k \times \nabla ( k \cross curl(\nabla^2 u) ) !$omp parallel !$omp do schedule(runtime) & - !$omp private(iEdge, cell1, cell2, vertex1, vertex2, invDcEdge, invDvEdge, r_tmp, k, u_diffusion) + !$omp private(cell1, cell2, vertex1, vertex2, invDcEdge, invDvEdge, r_tmp, k, u_diffusion) do iEdge = 1, nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) diff --git a/src/core_ocean/shared/mpas_ocn_vel_pressure_grad.F b/src/core_ocean/shared/mpas_ocn_vel_pressure_grad.F index 759686a91d..74b10c4da3 100644 --- a/src/core_ocean/shared/mpas_ocn_vel_pressure_grad.F +++ b/src/core_ocean/shared/mpas_ocn_vel_pressure_grad.F @@ -248,7 +248,7 @@ subroutine ocn_vel_pressure_grad_tend(meshPool, ssh, pressure, montgomeryPotenti allocate(JacobianDxDs(nVertLevels)) !$omp parallel - !$omp do schedule(runtime) private(iEdge, cell1, cell2, invdcEdge, k, pGrad) + !$omp do schedule(runtime) private(cell1, cell2, invdcEdge, k, pGrad) do iEdge=1,nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) @@ -289,7 +289,7 @@ subroutine ocn_vel_pressure_grad_tend(meshPool, ssh, pressure, montgomeryPotenti !$omp parallel !$omp do schedule(runtime) & - !$omp private(iEdge, cell1, cell2, invdcEdge, kMax, T1, T2, S1, S2, k, alpha, beta, & + !$omp private(cell1, cell2, invdcEdge, kMax, T1, T2, S1, S2, k, alpha, beta, & !$omp pGrad, JacobianDxDs, JacobianTz, JacobianSz) do iEdge=1,nEdges cell1 = cellsOnEdge(1,iEdge) diff --git a/src/core_ocean/shared/mpas_ocn_vmix.F b/src/core_ocean/shared/mpas_ocn_vmix.F index 2bc55efa19..b5898de380 100644 --- a/src/core_ocean/shared/mpas_ocn_vmix.F +++ b/src/core_ocean/shared/mpas_ocn_vmix.F @@ -277,7 +277,7 @@ subroutine ocn_vel_vmix_tend_implicit_rayleigh(meshPool, dt, kineticEnergyCell, !$omp parallel !$omp do schedule(runtime) & - !$omp private(iEdge, N, cell1, cell2, edgeThicknessTotal, k, A, B, C, velTemp) + !$omp private(N, cell1, cell2, edgeThicknessTotal, k, A, B, C, velTemp) do iEdge = 1, nEdges N = maxLevelEdgeTop(iEdge) if (N .gt. 0) then @@ -430,7 +430,7 @@ subroutine ocn_vel_vmix_tend_implicit(meshPool, dt, kineticEnergyCell, vertViscT A(1)=0.0_RKIND !$omp parallel - !$omp do schedule(runtime) private(iEdge, N, cell1, cell2, A, B, C, velTemp) + !$omp do schedule(runtime) private(N, cell1, cell2, A, B, C, velTemp) do iEdge = 1, nEdges N = maxLevelEdgeTop(iEdge) if (N .gt. 0) then @@ -574,7 +574,7 @@ subroutine ocn_tracer_vmix_tend_implicit(meshPool, dt, vertDiffTopOfCell, layerT call mpas_timer_start('vmix tracers tend imp loop', .false.) !$omp parallel - !$omp do schedule(runtime) private(iCell, N, A, B, C, rhs, tracersTemp) + !$omp do schedule(runtime) private(N, A, B, C, rhs, tracersTemp) do iCell = 1, nCells ! Compute A(k), B(k), C(k) for tracers N = maxLevelCell(iCell) diff --git a/src/core_ocean/shared/mpas_ocn_wetting_drying.F b/src/core_ocean/shared/mpas_ocn_wetting_drying.F index c0df1d0c2e..809c7a5f39 100644 --- a/src/core_ocean/shared/mpas_ocn_wetting_drying.F +++ b/src/core_ocean/shared/mpas_ocn_wetting_drying.F @@ -286,7 +286,7 @@ subroutine ocn_prevent_drying_rk4(block, dt, rkSubstepWeight, config_zero_drying ! prevent drying from happening with selective wettingVelocity !$omp parallel - !$omp do schedule(runtime) private(iEdge, k) + !$omp do schedule(runtime) private(k) do iEdge = 1, nEdges do k = 1, maxLevelEdgeBot(iEdge) if (abs(normalTransportVelocity(k,iEdge) + wettingVelocity(k,iEdge)) < eps) then From b7f71aeb88d11628025345f77de94b512f0b8b7f Mon Sep 17 00:00:00 2001 From: Mark Petersen Date: Fri, 29 May 2020 07:14:13 -0600 Subject: [PATCH 06/16] Add rk4 thread test --- .../QU240/rk4_thread_test/config_1thread.xml | 33 +++++++++++++++++++ .../QU240/rk4_thread_test/config_2thread.xml | 33 +++++++++++++++++++ .../QU240/rk4_thread_test/config_driver.xml | 13 ++++++++ .../ocean/regression_suites/nightly.xml | 3 ++ 4 files changed, 82 insertions(+) create mode 100644 testing_and_setup/compass/ocean/global_ocean/QU240/rk4_thread_test/config_1thread.xml create mode 100644 testing_and_setup/compass/ocean/global_ocean/QU240/rk4_thread_test/config_2thread.xml create mode 100644 testing_and_setup/compass/ocean/global_ocean/QU240/rk4_thread_test/config_driver.xml diff --git a/testing_and_setup/compass/ocean/global_ocean/QU240/rk4_thread_test/config_1thread.xml b/testing_and_setup/compass/ocean/global_ocean/QU240/rk4_thread_test/config_1thread.xml new file mode 100644 index 0000000000..8e44ee32f2 --- /dev/null +++ b/testing_and_setup/compass/ocean/global_ocean/QU240/rk4_thread_test/config_1thread.xml @@ -0,0 +1,33 @@ + + + + + + + +