From 394bdb1b6a99ea272b904a982bfc63c1608801da Mon Sep 17 00:00:00 2001 From: "Stefan J. Wernli" Date: Tue, 23 Jun 2020 09:04:40 -0700 Subject: [PATCH 1/2] Merging Linux simulator update from dbwz8/simDbg --- src/Simulation/Native/codegen/codegen_fma.py | 4 ++-- src/Simulation/Native/src/external/avx/kernel1.hpp | 2 +- src/Simulation/Native/src/external/avx/kernel2.hpp | 2 +- src/Simulation/Native/src/external/avx/kernel3.hpp | 2 +- src/Simulation/Native/src/external/avx/kernel4.hpp | 2 +- src/Simulation/Native/src/external/avx/kernel5.hpp | 2 +- src/Simulation/Native/src/external/avx/kernel6.hpp | 2 +- src/Simulation/Native/src/external/avx/kernel7.hpp | 2 +- src/Simulation/Native/src/external/avx2/kernel1.hpp | 2 +- src/Simulation/Native/src/external/avx2/kernel2.hpp | 2 +- src/Simulation/Native/src/external/avx2/kernel3.hpp | 2 +- src/Simulation/Native/src/external/avx2/kernel4.hpp | 2 +- src/Simulation/Native/src/external/avx2/kernel5.hpp | 2 +- src/Simulation/Native/src/external/avx2/kernel6.hpp | 2 +- src/Simulation/Native/src/external/avx2/kernel7.hpp | 2 +- src/Simulation/Native/src/external/avx512/kernel1.hpp | 4 ++-- src/Simulation/Native/src/external/avx512/kernel2.hpp | 4 ++-- src/Simulation/Native/src/external/avx512/kernel3.hpp | 4 ++-- src/Simulation/Native/src/external/avx512/kernel4.hpp | 4 ++-- src/Simulation/Native/src/external/avx512/kernel5.hpp | 4 ++-- src/Simulation/Native/src/external/avx512/kernel6.hpp | 4 ++-- src/Simulation/Native/src/external/avx512/kernel7.hpp | 4 ++-- src/Simulation/Native/src/external/nointrin/kernel1.hpp | 2 +- src/Simulation/Native/src/external/nointrin/kernel2.hpp | 2 +- src/Simulation/Native/src/external/nointrin/kernel3.hpp | 2 +- src/Simulation/Native/src/external/nointrin/kernel4.hpp | 2 +- src/Simulation/Native/src/external/nointrin/kernel5.hpp | 2 +- src/Simulation/Native/src/external/nointrin/kernel6.hpp | 2 +- src/Simulation/Native/src/external/nointrin/kernel7.hpp | 2 +- 29 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/Simulation/Native/codegen/codegen_fma.py b/src/Simulation/Native/codegen/codegen_fma.py index d54638b81ba..769bc6d5bec 100644 --- a/src/Simulation/Native/codegen/codegen_fma.py +++ b/src/Simulation/Native/codegen/codegen_fma.py @@ -243,7 +243,7 @@ def generate_kernel(n, blocks, only_one_matrix, unroll_loops, avx_len): kernelarray.append("#ifndef _MSC_VER\n") kernelarray.append("\t"*indent + "if (ctrlmask == 0){\n") indent += 1 - kernelarray.append("\t"*indent + "#pragma omp parallel for collapse(LOOP_COLLAPSE"+str(n)+") schedule(static)\n" + "\t"*indent + "for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){\n") + kernelarray.append("\t"*indent + "#pragma omp parallel for collapse(LOOP_COLLAPSE"+str(n)+") schedule(static) proc_bind(spread)\n" + "\t"*indent + "for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){\n") indent = indent + 1 for i in range(1,nc+1): kernelarray.append("\t"*indent + "for (std::size_t i"+str(i)+" = 0; i"+str(i)+" < dsorted["+str(i-1) + "]; i"+str(i)+" += 2 * dsorted["+str(i)+"]){\n") @@ -323,7 +323,7 @@ def generate_kernel(n, blocks, only_one_matrix, unroll_loops, avx_len): kernelarray.append( ";\n") kernelarray.append("\n"); kernelarray.append(" if (ctrlmask == 0){\n") - kernelarray.append(" #pragma omp parallel for schedule(static)\n") + kernelarray.append(" #pragma omp parallel for schedule(static)\n"); kernelarray.append(" for (std::intptr_t i = 0; i < static_cast(n); ++i)\n") kernelarray.append(" if ((i & dmask) == zero)\n") kernelarray.append(" kernel_core(psi, i") diff --git a/src/Simulation/Native/src/external/avx/kernel1.hpp b/src/Simulation/Native/src/external/avx/kernel1.hpp index 2bdcb04aea3..198676259e4 100644 --- a/src/Simulation/Native/src/external/avx/kernel1.hpp +++ b/src/Simulation/Native/src/external/avx/kernel1.hpp @@ -49,7 +49,7 @@ void kernel(V& psi, unsigned id0, M const& matrix, std::size_t ctrlmask) #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE1) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE1) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; ++i1){ kernel_core(psi, i0 + i1, dsorted[0], mm, mmt); diff --git a/src/Simulation/Native/src/external/avx/kernel2.hpp b/src/Simulation/Native/src/external/avx/kernel2.hpp index 8012bf3638d..7d52dc39eec 100644 --- a/src/Simulation/Native/src/external/avx/kernel2.hpp +++ b/src/Simulation/Native/src/external/avx/kernel2.hpp @@ -63,7 +63,7 @@ void kernel(V& psi, unsigned id1, unsigned id0, M const& matrix, std::size_t ctr #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE2) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE2) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; ++i2){ diff --git a/src/Simulation/Native/src/external/avx/kernel3.hpp b/src/Simulation/Native/src/external/avx/kernel3.hpp index bc7037c6004..58248d4742e 100644 --- a/src/Simulation/Native/src/external/avx/kernel3.hpp +++ b/src/Simulation/Native/src/external/avx/kernel3.hpp @@ -102,7 +102,7 @@ void kernel(V& psi, unsigned id2, unsigned id1, unsigned id0, M const& matrix, s #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE3) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE3) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx/kernel4.hpp b/src/Simulation/Native/src/external/avx/kernel4.hpp index e20a190af5c..7ddcd504404 100644 --- a/src/Simulation/Native/src/external/avx/kernel4.hpp +++ b/src/Simulation/Native/src/external/avx/kernel4.hpp @@ -227,7 +227,7 @@ void kernel(V& psi, unsigned id3, unsigned id2, unsigned id1, unsigned id0, M co #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE4) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE4) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx/kernel5.hpp b/src/Simulation/Native/src/external/avx/kernel5.hpp index 8cf84656e68..72078dd6fd4 100644 --- a/src/Simulation/Native/src/external/avx/kernel5.hpp +++ b/src/Simulation/Native/src/external/avx/kernel5.hpp @@ -380,7 +380,7 @@ void kernel(V& psi, unsigned id4, unsigned id3, unsigned id2, unsigned id1, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE5) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE5) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx/kernel6.hpp b/src/Simulation/Native/src/external/avx/kernel6.hpp index 087c4e83473..89a4364b22c 100644 --- a/src/Simulation/Native/src/external/avx/kernel6.hpp +++ b/src/Simulation/Native/src/external/avx/kernel6.hpp @@ -212,7 +212,7 @@ void kernel(V& psi, unsigned id5, unsigned id4, unsigned id3, unsigned id2, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE6) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE6) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx/kernel7.hpp b/src/Simulation/Native/src/external/avx/kernel7.hpp index 197e800b208..8dfda9eee71 100644 --- a/src/Simulation/Native/src/external/avx/kernel7.hpp +++ b/src/Simulation/Native/src/external/avx/kernel7.hpp @@ -389,7 +389,7 @@ void kernel(V& psi, unsigned id6, unsigned id5, unsigned id4, unsigned id3, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE7) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE7) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx2/kernel1.hpp b/src/Simulation/Native/src/external/avx2/kernel1.hpp index 2bdcb04aea3..198676259e4 100644 --- a/src/Simulation/Native/src/external/avx2/kernel1.hpp +++ b/src/Simulation/Native/src/external/avx2/kernel1.hpp @@ -49,7 +49,7 @@ void kernel(V& psi, unsigned id0, M const& matrix, std::size_t ctrlmask) #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE1) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE1) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; ++i1){ kernel_core(psi, i0 + i1, dsorted[0], mm, mmt); diff --git a/src/Simulation/Native/src/external/avx2/kernel2.hpp b/src/Simulation/Native/src/external/avx2/kernel2.hpp index 8012bf3638d..7d52dc39eec 100644 --- a/src/Simulation/Native/src/external/avx2/kernel2.hpp +++ b/src/Simulation/Native/src/external/avx2/kernel2.hpp @@ -63,7 +63,7 @@ void kernel(V& psi, unsigned id1, unsigned id0, M const& matrix, std::size_t ctr #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE2) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE2) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; ++i2){ diff --git a/src/Simulation/Native/src/external/avx2/kernel3.hpp b/src/Simulation/Native/src/external/avx2/kernel3.hpp index bc7037c6004..58248d4742e 100644 --- a/src/Simulation/Native/src/external/avx2/kernel3.hpp +++ b/src/Simulation/Native/src/external/avx2/kernel3.hpp @@ -102,7 +102,7 @@ void kernel(V& psi, unsigned id2, unsigned id1, unsigned id0, M const& matrix, s #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE3) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE3) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx2/kernel4.hpp b/src/Simulation/Native/src/external/avx2/kernel4.hpp index e20a190af5c..7ddcd504404 100644 --- a/src/Simulation/Native/src/external/avx2/kernel4.hpp +++ b/src/Simulation/Native/src/external/avx2/kernel4.hpp @@ -227,7 +227,7 @@ void kernel(V& psi, unsigned id3, unsigned id2, unsigned id1, unsigned id0, M co #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE4) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE4) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx2/kernel5.hpp b/src/Simulation/Native/src/external/avx2/kernel5.hpp index 8cf84656e68..72078dd6fd4 100644 --- a/src/Simulation/Native/src/external/avx2/kernel5.hpp +++ b/src/Simulation/Native/src/external/avx2/kernel5.hpp @@ -380,7 +380,7 @@ void kernel(V& psi, unsigned id4, unsigned id3, unsigned id2, unsigned id1, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE5) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE5) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx2/kernel6.hpp b/src/Simulation/Native/src/external/avx2/kernel6.hpp index 087c4e83473..89a4364b22c 100644 --- a/src/Simulation/Native/src/external/avx2/kernel6.hpp +++ b/src/Simulation/Native/src/external/avx2/kernel6.hpp @@ -212,7 +212,7 @@ void kernel(V& psi, unsigned id5, unsigned id4, unsigned id3, unsigned id2, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE6) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE6) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx2/kernel7.hpp b/src/Simulation/Native/src/external/avx2/kernel7.hpp index 197e800b208..8dfda9eee71 100644 --- a/src/Simulation/Native/src/external/avx2/kernel7.hpp +++ b/src/Simulation/Native/src/external/avx2/kernel7.hpp @@ -389,7 +389,7 @@ void kernel(V& psi, unsigned id6, unsigned id5, unsigned id4, unsigned id3, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE7) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE7) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx512/kernel1.hpp b/src/Simulation/Native/src/external/avx512/kernel1.hpp index 11f839e5025..19f2c473370 100644 --- a/src/Simulation/Native/src/external/avx512/kernel1.hpp +++ b/src/Simulation/Native/src/external/avx512/kernel1.hpp @@ -1,4 +1,4 @@ -// (C) 2018 ETH Zurich, ITP, Thomas H�ner and Damian Steiger +// (C) 2018 ETH Zurich, ITP, Thomas Häner and Damian Steiger template inline void kernel_core(V& psi, std::size_t I, std::size_t d0, M const& m, M const& mt) @@ -49,7 +49,7 @@ void kernel(V& psi, unsigned id0, M const& matrix, std::size_t ctrlmask) #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE1) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE1) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; ++i1){ kernel_core(psi, i0 + i1, dsorted[0], mm, mmt); diff --git a/src/Simulation/Native/src/external/avx512/kernel2.hpp b/src/Simulation/Native/src/external/avx512/kernel2.hpp index 9153a865dd7..9a47f3044fb 100644 --- a/src/Simulation/Native/src/external/avx512/kernel2.hpp +++ b/src/Simulation/Native/src/external/avx512/kernel2.hpp @@ -1,4 +1,4 @@ -// (C) 2018 ETH Zurich, ITP, Thomas H�ner and Damian Steiger +// (C) 2018 ETH Zurich, ITP, Thomas Häner and Damian Steiger template inline void kernel_core(V& psi, std::size_t I, std::size_t d0, std::size_t d1, M const& m, M const& mt) @@ -58,7 +58,7 @@ void kernel(V& psi, unsigned id1, unsigned id0, M const& matrix, std::size_t ctr #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE2) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE2) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; ++i2){ diff --git a/src/Simulation/Native/src/external/avx512/kernel3.hpp b/src/Simulation/Native/src/external/avx512/kernel3.hpp index e4db4808042..a0f27741672 100644 --- a/src/Simulation/Native/src/external/avx512/kernel3.hpp +++ b/src/Simulation/Native/src/external/avx512/kernel3.hpp @@ -1,4 +1,4 @@ -// (C) 2018 ETH Zurich, ITP, Thomas H�ner and Damian Steiger +// (C) 2018 ETH Zurich, ITP, Thomas Häner and Damian Steiger template inline void kernel_core(V& psi, std::size_t I, std::size_t d0, std::size_t d1, std::size_t d2, M const& m, M const& mt) @@ -84,7 +84,7 @@ void kernel(V& psi, unsigned id2, unsigned id1, unsigned id0, M const& matrix, s #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE3) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE3) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx512/kernel4.hpp b/src/Simulation/Native/src/external/avx512/kernel4.hpp index 16bfc1ff86e..e956661a996 100644 --- a/src/Simulation/Native/src/external/avx512/kernel4.hpp +++ b/src/Simulation/Native/src/external/avx512/kernel4.hpp @@ -1,4 +1,4 @@ -// (C) 2018 ETH Zurich, ITP, Thomas H�ner and Damian Steiger +// (C) 2018 ETH Zurich, ITP, Thomas Häner and Damian Steiger template inline void kernel_core(V& psi, std::size_t I, std::size_t d0, std::size_t d1, std::size_t d2, std::size_t d3, M const& m, M const& mt) @@ -159,7 +159,7 @@ void kernel(V& psi, unsigned id3, unsigned id2, unsigned id1, unsigned id0, M co #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE4) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE4) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx512/kernel5.hpp b/src/Simulation/Native/src/external/avx512/kernel5.hpp index 6d1a030edda..ec1cdb918e6 100644 --- a/src/Simulation/Native/src/external/avx512/kernel5.hpp +++ b/src/Simulation/Native/src/external/avx512/kernel5.hpp @@ -1,4 +1,4 @@ -// (C) 2018 ETH Zurich, ITP, Thomas H�ner and Damian Steiger +// (C) 2018 ETH Zurich, ITP, Thomas Häner and Damian Steiger template inline void kernel_core(V& psi, std::size_t I, std::size_t d0, std::size_t d1, std::size_t d2, std::size_t d3, std::size_t d4, M const& m, M const& mt) @@ -244,7 +244,7 @@ void kernel(V& psi, unsigned id4, unsigned id3, unsigned id2, unsigned id1, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE5) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE5) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx512/kernel6.hpp b/src/Simulation/Native/src/external/avx512/kernel6.hpp index 26bbcb6240d..77a6a89465e 100644 --- a/src/Simulation/Native/src/external/avx512/kernel6.hpp +++ b/src/Simulation/Native/src/external/avx512/kernel6.hpp @@ -1,4 +1,4 @@ -// (C) 2018 ETH Zurich, ITP, Thomas H�ner and Damian Steiger +// (C) 2018 ETH Zurich, ITP, Thomas Häner and Damian Steiger template inline void kernel_core(V& psi, std::size_t I, std::size_t d0, std::size_t d1, std::size_t d2, std::size_t d3, std::size_t d4, std::size_t d5, M const& m) @@ -196,7 +196,7 @@ void kernel(V& psi, unsigned id5, unsigned id4, unsigned id3, unsigned id2, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE6) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE6) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx512/kernel7.hpp b/src/Simulation/Native/src/external/avx512/kernel7.hpp index 8f80d7c7d34..8e60b76cff2 100644 --- a/src/Simulation/Native/src/external/avx512/kernel7.hpp +++ b/src/Simulation/Native/src/external/avx512/kernel7.hpp @@ -1,4 +1,4 @@ -// (C) 2018 ETH Zurich, ITP, Thomas H�ner and Damian Steiger +// (C) 2018 ETH Zurich, ITP, Thomas Häner and Damian Steiger template inline void kernel_core(V& psi, std::size_t I, std::size_t d0, std::size_t d1, std::size_t d2, std::size_t d3, std::size_t d4, std::size_t d5, std::size_t d6, M const& m) @@ -357,7 +357,7 @@ void kernel(V& psi, unsigned id6, unsigned id5, unsigned id4, unsigned id3, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE7) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE7) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/nointrin/kernel1.hpp b/src/Simulation/Native/src/external/nointrin/kernel1.hpp index 53840cd0904..015e7e9d227 100644 --- a/src/Simulation/Native/src/external/nointrin/kernel1.hpp +++ b/src/Simulation/Native/src/external/nointrin/kernel1.hpp @@ -43,7 +43,7 @@ void kernel(V& psi, unsigned id0, M const& matrix, std::size_t ctrlmask) #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE1) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE1) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; ++i1){ kernel_core(psi, i0 + i1, dsorted[0], mm); diff --git a/src/Simulation/Native/src/external/nointrin/kernel2.hpp b/src/Simulation/Native/src/external/nointrin/kernel2.hpp index 7ad58accb47..dcb47fe7f48 100644 --- a/src/Simulation/Native/src/external/nointrin/kernel2.hpp +++ b/src/Simulation/Native/src/external/nointrin/kernel2.hpp @@ -64,7 +64,7 @@ void kernel(V& psi, unsigned id1, unsigned id0, M const& matrix, std::size_t ctr #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE2) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE2) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; ++i2){ diff --git a/src/Simulation/Native/src/external/nointrin/kernel3.hpp b/src/Simulation/Native/src/external/nointrin/kernel3.hpp index 6b714eb0df8..1019845187a 100644 --- a/src/Simulation/Native/src/external/nointrin/kernel3.hpp +++ b/src/Simulation/Native/src/external/nointrin/kernel3.hpp @@ -129,7 +129,7 @@ void kernel(V& psi, unsigned id2, unsigned id1, unsigned id0, M const& matrix, s #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE3) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE3) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/nointrin/kernel4.hpp b/src/Simulation/Native/src/external/nointrin/kernel4.hpp index f8b38ec3a92..46d33620e74 100644 --- a/src/Simulation/Native/src/external/nointrin/kernel4.hpp +++ b/src/Simulation/Native/src/external/nointrin/kernel4.hpp @@ -354,7 +354,7 @@ void kernel(V& psi, unsigned id3, unsigned id2, unsigned id1, unsigned id0, M co #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE4) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE4) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/nointrin/kernel5.hpp b/src/Simulation/Native/src/external/nointrin/kernel5.hpp index 59094e29042..08657104779 100644 --- a/src/Simulation/Native/src/external/nointrin/kernel5.hpp +++ b/src/Simulation/Native/src/external/nointrin/kernel5.hpp @@ -643,7 +643,7 @@ void kernel(V& psi, unsigned id4, unsigned id3, unsigned id2, unsigned id1, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE5) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE5) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/nointrin/kernel6.hpp b/src/Simulation/Native/src/external/nointrin/kernel6.hpp index 0fe55f34878..7f8ea4741a3 100644 --- a/src/Simulation/Native/src/external/nointrin/kernel6.hpp +++ b/src/Simulation/Native/src/external/nointrin/kernel6.hpp @@ -244,7 +244,7 @@ void kernel(V& psi, unsigned id5, unsigned id4, unsigned id3, unsigned id2, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE6) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE6) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/nointrin/kernel7.hpp b/src/Simulation/Native/src/external/nointrin/kernel7.hpp index 2a3809b134f..fc8401da66f 100644 --- a/src/Simulation/Native/src/external/nointrin/kernel7.hpp +++ b/src/Simulation/Native/src/external/nointrin/kernel7.hpp @@ -453,7 +453,7 @@ void kernel(V& psi, unsigned id6, unsigned id5, unsigned id4, unsigned id3, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE7) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE7) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ From 2f1c62b6b3c307587854437ba38254f799fefc7b Mon Sep 17 00:00:00 2001 From: "Stefan J. Wernli" Date: Tue, 23 Jun 2020 09:17:21 -0700 Subject: [PATCH 2/2] Remove extra semicolon --- src/Simulation/Native/codegen/codegen_fma.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Simulation/Native/codegen/codegen_fma.py b/src/Simulation/Native/codegen/codegen_fma.py index 769bc6d5bec..6e523aa1498 100644 --- a/src/Simulation/Native/codegen/codegen_fma.py +++ b/src/Simulation/Native/codegen/codegen_fma.py @@ -323,7 +323,7 @@ def generate_kernel(n, blocks, only_one_matrix, unroll_loops, avx_len): kernelarray.append( ";\n") kernelarray.append("\n"); kernelarray.append(" if (ctrlmask == 0){\n") - kernelarray.append(" #pragma omp parallel for schedule(static)\n"); + kernelarray.append(" #pragma omp parallel for schedule(static)\n") kernelarray.append(" for (std::intptr_t i = 0; i < static_cast(n); ++i)\n") kernelarray.append(" if ((i & dmask) == zero)\n") kernelarray.append(" kernel_core(psi, i")