diff --git a/src/Simulation/Native/codegen/codegen_fma.py b/src/Simulation/Native/codegen/codegen_fma.py index d54638b81ba..6e523aa1498 100644 --- a/src/Simulation/Native/codegen/codegen_fma.py +++ b/src/Simulation/Native/codegen/codegen_fma.py @@ -243,7 +243,7 @@ def generate_kernel(n, blocks, only_one_matrix, unroll_loops, avx_len): kernelarray.append("#ifndef _MSC_VER\n") kernelarray.append("\t"*indent + "if (ctrlmask == 0){\n") indent += 1 - kernelarray.append("\t"*indent + "#pragma omp parallel for collapse(LOOP_COLLAPSE"+str(n)+") schedule(static)\n" + "\t"*indent + "for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){\n") + kernelarray.append("\t"*indent + "#pragma omp parallel for collapse(LOOP_COLLAPSE"+str(n)+") schedule(static) proc_bind(spread)\n" + "\t"*indent + "for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){\n") indent = indent + 1 for i in range(1,nc+1): kernelarray.append("\t"*indent + "for (std::size_t i"+str(i)+" = 0; i"+str(i)+" < dsorted["+str(i-1) + "]; i"+str(i)+" += 2 * dsorted["+str(i)+"]){\n") diff --git a/src/Simulation/Native/src/external/avx/kernel1.hpp b/src/Simulation/Native/src/external/avx/kernel1.hpp index 2bdcb04aea3..198676259e4 100644 --- a/src/Simulation/Native/src/external/avx/kernel1.hpp +++ b/src/Simulation/Native/src/external/avx/kernel1.hpp @@ -49,7 +49,7 @@ void kernel(V& psi, unsigned id0, M const& matrix, std::size_t ctrlmask) #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE1) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE1) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; ++i1){ kernel_core(psi, i0 + i1, dsorted[0], mm, mmt); diff --git a/src/Simulation/Native/src/external/avx/kernel2.hpp b/src/Simulation/Native/src/external/avx/kernel2.hpp index 8012bf3638d..7d52dc39eec 100644 --- a/src/Simulation/Native/src/external/avx/kernel2.hpp +++ b/src/Simulation/Native/src/external/avx/kernel2.hpp @@ -63,7 +63,7 @@ void kernel(V& psi, unsigned id1, unsigned id0, M const& matrix, std::size_t ctr #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE2) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE2) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; ++i2){ diff --git a/src/Simulation/Native/src/external/avx/kernel3.hpp b/src/Simulation/Native/src/external/avx/kernel3.hpp index bc7037c6004..58248d4742e 100644 --- a/src/Simulation/Native/src/external/avx/kernel3.hpp +++ b/src/Simulation/Native/src/external/avx/kernel3.hpp @@ -102,7 +102,7 @@ void kernel(V& psi, unsigned id2, unsigned id1, unsigned id0, M const& matrix, s #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE3) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE3) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx/kernel4.hpp b/src/Simulation/Native/src/external/avx/kernel4.hpp index e20a190af5c..7ddcd504404 100644 --- a/src/Simulation/Native/src/external/avx/kernel4.hpp +++ b/src/Simulation/Native/src/external/avx/kernel4.hpp @@ -227,7 +227,7 @@ void kernel(V& psi, unsigned id3, unsigned id2, unsigned id1, unsigned id0, M co #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE4) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE4) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx/kernel5.hpp b/src/Simulation/Native/src/external/avx/kernel5.hpp index 8cf84656e68..72078dd6fd4 100644 --- a/src/Simulation/Native/src/external/avx/kernel5.hpp +++ b/src/Simulation/Native/src/external/avx/kernel5.hpp @@ -380,7 +380,7 @@ void kernel(V& psi, unsigned id4, unsigned id3, unsigned id2, unsigned id1, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE5) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE5) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx/kernel6.hpp b/src/Simulation/Native/src/external/avx/kernel6.hpp index 087c4e83473..89a4364b22c 100644 --- a/src/Simulation/Native/src/external/avx/kernel6.hpp +++ b/src/Simulation/Native/src/external/avx/kernel6.hpp @@ -212,7 +212,7 @@ void kernel(V& psi, unsigned id5, unsigned id4, unsigned id3, unsigned id2, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE6) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE6) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx/kernel7.hpp b/src/Simulation/Native/src/external/avx/kernel7.hpp index 197e800b208..8dfda9eee71 100644 --- a/src/Simulation/Native/src/external/avx/kernel7.hpp +++ b/src/Simulation/Native/src/external/avx/kernel7.hpp @@ -389,7 +389,7 @@ void kernel(V& psi, unsigned id6, unsigned id5, unsigned id4, unsigned id3, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE7) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE7) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx2/kernel1.hpp b/src/Simulation/Native/src/external/avx2/kernel1.hpp index 2bdcb04aea3..198676259e4 100644 --- a/src/Simulation/Native/src/external/avx2/kernel1.hpp +++ b/src/Simulation/Native/src/external/avx2/kernel1.hpp @@ -49,7 +49,7 @@ void kernel(V& psi, unsigned id0, M const& matrix, std::size_t ctrlmask) #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE1) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE1) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; ++i1){ kernel_core(psi, i0 + i1, dsorted[0], mm, mmt); diff --git a/src/Simulation/Native/src/external/avx2/kernel2.hpp b/src/Simulation/Native/src/external/avx2/kernel2.hpp index 8012bf3638d..7d52dc39eec 100644 --- a/src/Simulation/Native/src/external/avx2/kernel2.hpp +++ b/src/Simulation/Native/src/external/avx2/kernel2.hpp @@ -63,7 +63,7 @@ void kernel(V& psi, unsigned id1, unsigned id0, M const& matrix, std::size_t ctr #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE2) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE2) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; ++i2){ diff --git a/src/Simulation/Native/src/external/avx2/kernel3.hpp b/src/Simulation/Native/src/external/avx2/kernel3.hpp index bc7037c6004..58248d4742e 100644 --- a/src/Simulation/Native/src/external/avx2/kernel3.hpp +++ b/src/Simulation/Native/src/external/avx2/kernel3.hpp @@ -102,7 +102,7 @@ void kernel(V& psi, unsigned id2, unsigned id1, unsigned id0, M const& matrix, s #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE3) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE3) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx2/kernel4.hpp b/src/Simulation/Native/src/external/avx2/kernel4.hpp index e20a190af5c..7ddcd504404 100644 --- a/src/Simulation/Native/src/external/avx2/kernel4.hpp +++ b/src/Simulation/Native/src/external/avx2/kernel4.hpp @@ -227,7 +227,7 @@ void kernel(V& psi, unsigned id3, unsigned id2, unsigned id1, unsigned id0, M co #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE4) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE4) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx2/kernel5.hpp b/src/Simulation/Native/src/external/avx2/kernel5.hpp index 8cf84656e68..72078dd6fd4 100644 --- a/src/Simulation/Native/src/external/avx2/kernel5.hpp +++ b/src/Simulation/Native/src/external/avx2/kernel5.hpp @@ -380,7 +380,7 @@ void kernel(V& psi, unsigned id4, unsigned id3, unsigned id2, unsigned id1, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE5) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE5) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx2/kernel6.hpp b/src/Simulation/Native/src/external/avx2/kernel6.hpp index 087c4e83473..89a4364b22c 100644 --- a/src/Simulation/Native/src/external/avx2/kernel6.hpp +++ b/src/Simulation/Native/src/external/avx2/kernel6.hpp @@ -212,7 +212,7 @@ void kernel(V& psi, unsigned id5, unsigned id4, unsigned id3, unsigned id2, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE6) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE6) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx2/kernel7.hpp b/src/Simulation/Native/src/external/avx2/kernel7.hpp index 197e800b208..8dfda9eee71 100644 --- a/src/Simulation/Native/src/external/avx2/kernel7.hpp +++ b/src/Simulation/Native/src/external/avx2/kernel7.hpp @@ -389,7 +389,7 @@ void kernel(V& psi, unsigned id6, unsigned id5, unsigned id4, unsigned id3, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE7) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE7) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx512/kernel1.hpp b/src/Simulation/Native/src/external/avx512/kernel1.hpp index 11f839e5025..19f2c473370 100644 --- a/src/Simulation/Native/src/external/avx512/kernel1.hpp +++ b/src/Simulation/Native/src/external/avx512/kernel1.hpp @@ -1,4 +1,4 @@ -// (C) 2018 ETH Zurich, ITP, Thomas H�ner and Damian Steiger +// (C) 2018 ETH Zurich, ITP, Thomas Häner and Damian Steiger template inline void kernel_core(V& psi, std::size_t I, std::size_t d0, M const& m, M const& mt) @@ -49,7 +49,7 @@ void kernel(V& psi, unsigned id0, M const& matrix, std::size_t ctrlmask) #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE1) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE1) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; ++i1){ kernel_core(psi, i0 + i1, dsorted[0], mm, mmt); diff --git a/src/Simulation/Native/src/external/avx512/kernel2.hpp b/src/Simulation/Native/src/external/avx512/kernel2.hpp index 9153a865dd7..9a47f3044fb 100644 --- a/src/Simulation/Native/src/external/avx512/kernel2.hpp +++ b/src/Simulation/Native/src/external/avx512/kernel2.hpp @@ -1,4 +1,4 @@ -// (C) 2018 ETH Zurich, ITP, Thomas H�ner and Damian Steiger +// (C) 2018 ETH Zurich, ITP, Thomas Häner and Damian Steiger template inline void kernel_core(V& psi, std::size_t I, std::size_t d0, std::size_t d1, M const& m, M const& mt) @@ -58,7 +58,7 @@ void kernel(V& psi, unsigned id1, unsigned id0, M const& matrix, std::size_t ctr #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE2) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE2) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; ++i2){ diff --git a/src/Simulation/Native/src/external/avx512/kernel3.hpp b/src/Simulation/Native/src/external/avx512/kernel3.hpp index e4db4808042..a0f27741672 100644 --- a/src/Simulation/Native/src/external/avx512/kernel3.hpp +++ b/src/Simulation/Native/src/external/avx512/kernel3.hpp @@ -1,4 +1,4 @@ -// (C) 2018 ETH Zurich, ITP, Thomas H�ner and Damian Steiger +// (C) 2018 ETH Zurich, ITP, Thomas Häner and Damian Steiger template inline void kernel_core(V& psi, std::size_t I, std::size_t d0, std::size_t d1, std::size_t d2, M const& m, M const& mt) @@ -84,7 +84,7 @@ void kernel(V& psi, unsigned id2, unsigned id1, unsigned id0, M const& matrix, s #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE3) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE3) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx512/kernel4.hpp b/src/Simulation/Native/src/external/avx512/kernel4.hpp index 16bfc1ff86e..e956661a996 100644 --- a/src/Simulation/Native/src/external/avx512/kernel4.hpp +++ b/src/Simulation/Native/src/external/avx512/kernel4.hpp @@ -1,4 +1,4 @@ -// (C) 2018 ETH Zurich, ITP, Thomas H�ner and Damian Steiger +// (C) 2018 ETH Zurich, ITP, Thomas Häner and Damian Steiger template inline void kernel_core(V& psi, std::size_t I, std::size_t d0, std::size_t d1, std::size_t d2, std::size_t d3, M const& m, M const& mt) @@ -159,7 +159,7 @@ void kernel(V& psi, unsigned id3, unsigned id2, unsigned id1, unsigned id0, M co #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE4) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE4) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx512/kernel5.hpp b/src/Simulation/Native/src/external/avx512/kernel5.hpp index 6d1a030edda..ec1cdb918e6 100644 --- a/src/Simulation/Native/src/external/avx512/kernel5.hpp +++ b/src/Simulation/Native/src/external/avx512/kernel5.hpp @@ -1,4 +1,4 @@ -// (C) 2018 ETH Zurich, ITP, Thomas H�ner and Damian Steiger +// (C) 2018 ETH Zurich, ITP, Thomas Häner and Damian Steiger template inline void kernel_core(V& psi, std::size_t I, std::size_t d0, std::size_t d1, std::size_t d2, std::size_t d3, std::size_t d4, M const& m, M const& mt) @@ -244,7 +244,7 @@ void kernel(V& psi, unsigned id4, unsigned id3, unsigned id2, unsigned id1, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE5) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE5) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx512/kernel6.hpp b/src/Simulation/Native/src/external/avx512/kernel6.hpp index 26bbcb6240d..77a6a89465e 100644 --- a/src/Simulation/Native/src/external/avx512/kernel6.hpp +++ b/src/Simulation/Native/src/external/avx512/kernel6.hpp @@ -1,4 +1,4 @@ -// (C) 2018 ETH Zurich, ITP, Thomas H�ner and Damian Steiger +// (C) 2018 ETH Zurich, ITP, Thomas Häner and Damian Steiger template inline void kernel_core(V& psi, std::size_t I, std::size_t d0, std::size_t d1, std::size_t d2, std::size_t d3, std::size_t d4, std::size_t d5, M const& m) @@ -196,7 +196,7 @@ void kernel(V& psi, unsigned id5, unsigned id4, unsigned id3, unsigned id2, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE6) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE6) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/avx512/kernel7.hpp b/src/Simulation/Native/src/external/avx512/kernel7.hpp index 8f80d7c7d34..8e60b76cff2 100644 --- a/src/Simulation/Native/src/external/avx512/kernel7.hpp +++ b/src/Simulation/Native/src/external/avx512/kernel7.hpp @@ -1,4 +1,4 @@ -// (C) 2018 ETH Zurich, ITP, Thomas H�ner and Damian Steiger +// (C) 2018 ETH Zurich, ITP, Thomas Häner and Damian Steiger template inline void kernel_core(V& psi, std::size_t I, std::size_t d0, std::size_t d1, std::size_t d2, std::size_t d3, std::size_t d4, std::size_t d5, std::size_t d6, M const& m) @@ -357,7 +357,7 @@ void kernel(V& psi, unsigned id6, unsigned id5, unsigned id4, unsigned id3, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE7) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE7) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/nointrin/kernel1.hpp b/src/Simulation/Native/src/external/nointrin/kernel1.hpp index 53840cd0904..015e7e9d227 100644 --- a/src/Simulation/Native/src/external/nointrin/kernel1.hpp +++ b/src/Simulation/Native/src/external/nointrin/kernel1.hpp @@ -43,7 +43,7 @@ void kernel(V& psi, unsigned id0, M const& matrix, std::size_t ctrlmask) #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE1) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE1) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; ++i1){ kernel_core(psi, i0 + i1, dsorted[0], mm); diff --git a/src/Simulation/Native/src/external/nointrin/kernel2.hpp b/src/Simulation/Native/src/external/nointrin/kernel2.hpp index 7ad58accb47..dcb47fe7f48 100644 --- a/src/Simulation/Native/src/external/nointrin/kernel2.hpp +++ b/src/Simulation/Native/src/external/nointrin/kernel2.hpp @@ -64,7 +64,7 @@ void kernel(V& psi, unsigned id1, unsigned id0, M const& matrix, std::size_t ctr #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE2) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE2) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; ++i2){ diff --git a/src/Simulation/Native/src/external/nointrin/kernel3.hpp b/src/Simulation/Native/src/external/nointrin/kernel3.hpp index 6b714eb0df8..1019845187a 100644 --- a/src/Simulation/Native/src/external/nointrin/kernel3.hpp +++ b/src/Simulation/Native/src/external/nointrin/kernel3.hpp @@ -129,7 +129,7 @@ void kernel(V& psi, unsigned id2, unsigned id1, unsigned id0, M const& matrix, s #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE3) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE3) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/nointrin/kernel4.hpp b/src/Simulation/Native/src/external/nointrin/kernel4.hpp index f8b38ec3a92..46d33620e74 100644 --- a/src/Simulation/Native/src/external/nointrin/kernel4.hpp +++ b/src/Simulation/Native/src/external/nointrin/kernel4.hpp @@ -354,7 +354,7 @@ void kernel(V& psi, unsigned id3, unsigned id2, unsigned id1, unsigned id0, M co #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE4) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE4) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/nointrin/kernel5.hpp b/src/Simulation/Native/src/external/nointrin/kernel5.hpp index 59094e29042..08657104779 100644 --- a/src/Simulation/Native/src/external/nointrin/kernel5.hpp +++ b/src/Simulation/Native/src/external/nointrin/kernel5.hpp @@ -643,7 +643,7 @@ void kernel(V& psi, unsigned id4, unsigned id3, unsigned id2, unsigned id1, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE5) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE5) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/nointrin/kernel6.hpp b/src/Simulation/Native/src/external/nointrin/kernel6.hpp index 0fe55f34878..7f8ea4741a3 100644 --- a/src/Simulation/Native/src/external/nointrin/kernel6.hpp +++ b/src/Simulation/Native/src/external/nointrin/kernel6.hpp @@ -244,7 +244,7 @@ void kernel(V& psi, unsigned id5, unsigned id4, unsigned id3, unsigned id2, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE6) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE6) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){ diff --git a/src/Simulation/Native/src/external/nointrin/kernel7.hpp b/src/Simulation/Native/src/external/nointrin/kernel7.hpp index 2a3809b134f..fc8401da66f 100644 --- a/src/Simulation/Native/src/external/nointrin/kernel7.hpp +++ b/src/Simulation/Native/src/external/nointrin/kernel7.hpp @@ -453,7 +453,7 @@ void kernel(V& psi, unsigned id6, unsigned id5, unsigned id4, unsigned id3, unsi #ifndef _MSC_VER if (ctrlmask == 0){ - #pragma omp parallel for collapse(LOOP_COLLAPSE7) schedule(static) + #pragma omp parallel for collapse(LOOP_COLLAPSE7) schedule(static) proc_bind(spread) for (std::size_t i0 = 0; i0 < n; i0 += 2 * dsorted[0]){ for (std::size_t i1 = 0; i1 < dsorted[0]; i1 += 2 * dsorted[1]){ for (std::size_t i2 = 0; i2 < dsorted[1]; i2 += 2 * dsorted[2]){