diff --git a/src/Simulation/Native/CMakeSettings.json b/src/Simulation/Native/CMakeSettings.json new file mode 100644 index 00000000000..ee45e8257c1 --- /dev/null +++ b/src/Simulation/Native/CMakeSettings.json @@ -0,0 +1,28 @@ +{ + "configurations": [ + { + "name": "x64-Debug", + "generator": "Ninja", + "configurationType": "Debug", + "inheritEnvironments": [ "msvc_x64_x64" ], + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "", + "ctestCommandArgs": "", + "variables": [] + }, + { + "name": "x64-Release", + "generator": "Ninja", + "configurationType": "RelWithDebInfo", + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "", + "buildCommandArgs": "", + "ctestCommandArgs": "", + "inheritEnvironments": [ "msvc_x64_x64" ], + "variables": [] + } + ] +} \ No newline at end of file diff --git a/src/Simulation/Native/src/external/fused.hpp b/src/Simulation/Native/src/external/fused.hpp index 235cf8ed15c..f1c7ac49e68 100644 --- a/src/Simulation/Native/src/external/fused.hpp +++ b/src/Simulation/Native/src/external/fused.hpp @@ -41,7 +41,14 @@ class Fused fusedgates = Fusion(); } + const Fusion& get_fusedgates() const { + return fusedgates; + } + void set_fusedgates(Fusion newFusedGates) const { + fusedgates = newFusedGates; + } + template void flush(std::vector& wfn) const { @@ -79,16 +86,6 @@ class Fused fusedgates = Fusion(); } - template - bool subsytemwavefunction(std::vector& wfn, - std::vector const& qs, - std::vector& qubitswfn, - double tolerance) - { - flush(wfn); // we have to flush before we can extract the state - return kernels::subsytemwavefunction(wfn, qs, qubitswfn, tolerance); - } - template Fusion::Matrix convertMatrix(M const& m) { @@ -102,11 +99,25 @@ class Fused template void apply_controlled(std::vector& wfn, M const& mat, std::vector const& cs, unsigned q) { - // Major runtime logic change here + Fusion::IndexVector qs = std::vector(1, q); + fusedgates.insert(convertMatrix(mat), qs, cs); + } - // Have to update capacity as the WFN grows + template + void apply(std::vector& wfn, M const& mat, unsigned q) + { + std::vector cs; + apply_controlled(wfn, mat, cs, q); + } + + template + bool shouldFlush(std::vector& wfn, std::vector const& cs, unsigned q) + { + // Major runtime logic change here + + // Have to update capacity as the WFN grows if (wfnCapacity != wfn.capacity()) { - wfnCapacity = wfn.capacity(); + wfnCapacity = wfn.capacity(); char* envNT = NULL; size_t len; #ifdef _MSC_VER @@ -133,16 +144,9 @@ class Fused } // New rules of when to stop fusing - Fusion::IndexVector qs = std::vector(1, q); - if (fusedgates.predict(qs, cs) > maxFusedSpan || fusedgates.size() >= maxFusedDepth) flush(wfn); - fusedgates.insert(convertMatrix(mat), qs, cs); - } + Fusion::IndexVector qs = std::vector(1, q); - template - void apply(std::vector& wfn, M const& mat, unsigned q) - { - std::vector cs; - apply_controlled(wfn, mat, cs, q); + return (fusedgates.predict(qs, cs) > maxFusedSpan || fusedgates.size() >= maxFusedDepth); } private: mutable Fusion fusedgates; diff --git a/src/Simulation/Native/src/external/fusion.hpp b/src/Simulation/Native/src/external/fusion.hpp index f3224f79212..f89471a1e88 100644 --- a/src/Simulation/Native/src/external/fusion.hpp +++ b/src/Simulation/Native/src/external/fusion.hpp @@ -10,6 +10,7 @@ #include #include #include "util/alignedalloc.hpp" +#include class Item{ public: @@ -17,14 +18,20 @@ class Item{ using IndexVector = std::vector; using Complex = std::complex; using Matrix = std::vector>>; - Item(Matrix mat, IndexVector idx) : mat_(mat), idx_(idx) {} + Item(Matrix mat, IndexVector idx) : mat_(std::move(mat)), idx_(idx) {} Matrix& get_matrix() { return mat_; } - IndexVector& get_indices() { return idx_; } + IndexVector& get_indices() const { return idx_; } + void remap_idx(std::unordered_map elemDict) const { + for (size_t i = 0; i < idx_.size(); i++) { + idx_[i] = elemDict[idx_[i]]; + } + } private: Matrix mat_; - IndexVector idx_; + mutable IndexVector idx_; }; +// Class handling the fusion of gates class Fusion{ public: using Index = unsigned; @@ -37,7 +44,7 @@ class Fusion{ Fusion() : global_factor_(1.) {} Index num_qubits() const { - return static_cast(set_.size()); + return static_cast(target_set_.size()); } Index num_controls() const { @@ -58,21 +65,58 @@ class Fusion{ handle_controls(empty_matrix, empty_vec, {}); // remove all current control qubits (this is a GLOBAL factor) } + const IndexSet& get_target_set() const { + return target_set_; + } + + const ItemVector& get_items() const { + return items_; + } + + const IndexSet& get_ctrl_set() const { + return ctrl_set_; + } + + const Complex& get_global_factor() const { + return global_factor_; + } + + static void remap_qubits(std::set& qubits, const std::unordered_map& mapFromOldLocToNewLoc) { + std::set tempSet; + for (unsigned elem : qubits) { + if (mapFromOldLocToNewLoc.find(elem) != mapFromOldLocToNewLoc.end()) { + tempSet.insert(mapFromOldLocToNewLoc.at(elem)); + } + } + qubits.swap(tempSet); + } + + void remap_target_set(const std::unordered_map& mapFromOldLocToNewLoc) const { + remap_qubits(target_set_, mapFromOldLocToNewLoc); + } + + void remap_ctrl_set(const std::unordered_map& mapFromOldLocToNewLoc) const { + remap_qubits(ctrl_set_, mapFromOldLocToNewLoc); + } + + void set_items(ItemVector&& newItems) { + items_.swap(newItems); + } // This saves a class instance create/destroy on every gate insert // Need a quick way to decide if we're going to grow too wide int predict(IndexVector index_list, IndexVector const& ctrl_list = {}) { int cnt = num_qubits() + num_controls(); for (auto idx : index_list) - if (set_.count(idx) == 0 && ctrl_set_.count(idx) == 0) cnt++; + if (target_set_.count(idx) == 0 && ctrl_set_.count(idx) == 0) cnt++; for (auto idx : ctrl_list) - if (set_.count(idx) == 0 && ctrl_set_.count(idx) == 0) cnt++; + if (target_set_.count(idx) == 0 && ctrl_set_.count(idx) == 0) cnt++; return cnt; } void insert(Matrix matrix, IndexVector index_list, IndexVector const& ctrl_list = {}){ for (auto idx : index_list) - set_.emplace(idx); + target_set_.emplace(idx); if (global_factor_ != 1. && ctrl_list.size() > 0){ assert(ctrl_set_.size() == 0); @@ -85,7 +129,7 @@ class Fusion{ } void get_indices(IndexVector &indices) const{ - for (auto idx : set_) + for (auto idx : target_set_) indices.push_back(idx); } @@ -93,7 +137,7 @@ class Fusion{ if (global_factor_ != 1.) assert(ctrl_set_.size() == 0); - for (auto idx : set_) + for (auto idx : target_set_) index_list.push_back(idx); unsigned N = num_qubits(); @@ -167,7 +211,7 @@ class Fusion{ if (ctrl_set_.count(ctrlIdx) == 0){ // need to either add it to the list or to the command if (items_.size() > 0){ // add it to the command add_controls(matrix, indexList, {ctrlIdx}); - set_.insert(ctrlIdx); + target_set_.insert(ctrlIdx); } else // add it to the list ctrl_set_.emplace(ctrlIdx); @@ -183,17 +227,17 @@ class Fusion{ for (auto idx : unhandled_ctrl){ new_ctrls.push_back(idx); ctrl_set_.erase(idx); - set_.insert(idx); + target_set_.insert(idx); } for (auto &item : items_) add_controls(item.get_matrix(), item.get_indices(), new_ctrls); } } - IndexSet set_; - ItemVector items_; - IndexSet ctrl_set_; - Complex global_factor_; + mutable IndexSet target_set_; //set of qubits being acted on + mutable ItemVector items_; //queue if gates to be fused + mutable IndexSet ctrl_set_; //set of controls + mutable Complex global_factor_; }; #endif diff --git a/src/Simulation/Native/src/external/nointrin/kernel1.hpp b/src/Simulation/Native/src/external/nointrin/kernel1.hpp index 015e7e9d227..5173b58d8a8 100644 --- a/src/Simulation/Native/src/external/nointrin/kernel1.hpp +++ b/src/Simulation/Native/src/external/nointrin/kernel1.hpp @@ -60,20 +60,20 @@ void kernel(V& psi, unsigned id0, M const& matrix, std::size_t ctrlmask) } } #else - std::intptr_t zero = 0; - std::intptr_t dmask = dsorted[0]; + std::intptr_t zero = 0; + std::intptr_t dmask = dsorted[0]; - if (ctrlmask == 0){ - #pragma omp parallel for schedule(static) - for (std::intptr_t i = 0; i < static_cast(n); ++i) - if ((i & dmask) == zero) - kernel_core(psi, i, dsorted[0], mm); - } else { - #pragma omp parallel for schedule(static) - for (std::intptr_t i = 0; i < static_cast(n); ++i) - if ((i & ctrlmask) == ctrlmask && (i & dmask) == zero) - kernel_core(psi, i, dsorted[0], mm); - } + if (ctrlmask == 0){ + #pragma omp parallel for schedule(static) + for (std::intptr_t i = 0; i < static_cast(n); ++i) + if ((i & dmask) == zero) + kernel_core(psi, i, dsorted[0], mm); + } else { + #pragma omp parallel for schedule(static) + for (std::intptr_t i = 0; i < static_cast(n); ++i) + if ((i & ctrlmask) == ctrlmask && (i & dmask) == zero) + kernel_core(psi, i, dsorted[0], mm); + } #endif } diff --git a/src/Simulation/Native/src/simulator/capi_test.cpp b/src/Simulation/Native/src/simulator/capi_test.cpp index feb725db2fa..4a686291619 100644 --- a/src/Simulation/Native/src/simulator/capi_test.cpp +++ b/src/Simulation/Native/src/simulator/capi_test.cpp @@ -107,24 +107,22 @@ void test_gates() allocateQubit(sim_id, 0); allocateQubit(sim_id, 1); - CRx(sim_id, 1.0, 0, 1); + CRx(sim_id, 1.0, 0, 1); - assert(M(sim_id, 1)==false); + assert(M(sim_id, 1) == false); X(sim_id, 0); - CRx(sim_id, 1.0, 0, 1); + CRx(sim_id, 1.0, 0, 1); H(sim_id, 1); CRx(sim_id, -1.0, 0, 1); H(sim_id, 1); - assert(M(sim_id, 1)==false); + assert(M(sim_id, 1) == false); X(sim_id, 1); - assert(M(sim_id, 1)==true); - - X(sim_id, 1); + assert(M(sim_id, 1) == true); release(sim_id, 0); release(sim_id, 1); @@ -132,7 +130,6 @@ void test_gates() destroy(sim_id); } - void test_allocate() { auto sim_id = init(); diff --git a/src/Simulation/Native/src/simulator/wavefunction.hpp b/src/Simulation/Native/src/simulator/wavefunction.hpp index 1d3a390642c..bb9fdae21bf 100644 --- a/src/Simulation/Native/src/simulator/wavefunction.hpp +++ b/src/Simulation/Native/src/simulator/wavefunction.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include "types.hpp" #include "gates.hpp" @@ -90,6 +91,50 @@ class Wavefunction void flush() const { + // logic to reorder + const Fusion& fg = fused_.get_fusedgates(); + const auto& itemsToFuse = fg.get_items(); + const auto& ctrlSet = fg.get_ctrl_set(); + // getting all qubits to move to lower end of the wfn + if (!itemsToFuse.empty()) { + std::vector unionOfAllQubitsInUse; + std::unordered_set indicesSet; //set is introduced to guard against duplicate insertion and maintianing original order + for (int i = 0; i < itemsToFuse.size(); i++) { + const auto& tempIndices = itemsToFuse[i].get_indices(); + for (unsigned j = 0; j < tempIndices.size(); j++) { + if (indicesSet.count(tempIndices[j]) == 0) { + unionOfAllQubitsInUse.push_back(tempIndices[j]); + indicesSet.insert(tempIndices[j]); + } + } + } + for (unsigned index : ctrlSet) { + if (indicesSet.count(index) == 0) { + unionOfAllQubitsInUse.push_back(index); + indicesSet.insert(index); + } + } + // performing reorder + std::vector indexLocs = qubits(unionOfAllQubitsInUse); + for (unsigned i = 0; i < indexLocs.size(); i++) + { + auto currLoc = indexLocs[i]; + reorder_wavefunction(currLoc, i); + indexLocs = qubits(unionOfAllQubitsInUse); + } + // keeping old and new location in order to set it appropriately + std::unordered_map old2newDict; + for (unsigned i = 0; i < unionOfAllQubitsInUse.size(); i++) { + old2newDict[unionOfAllQubitsInUse[i]] = indexLocs[i]; + } + + for (int i = 0; i < itemsToFuse.size(); i++) { + itemsToFuse[i].remap_idx(old2newDict); + } + fg.remap_target_set(old2newDict); + fg.remap_ctrl_set(old2newDict); + } + fused_.flush(wfn_); } @@ -134,7 +179,7 @@ class Wavefunction /// \pre the qubit has to be in a classical state in the computational basis void release(qubit_t q) { - unsigned p = qubit(q); + unsigned p = qubit(q); //returns qubitmap_[q] flush(); kernels::collapse(wfn_, p, getvalue(q), true); for (int i = 0; i < qubitmap_.size(); ++i) @@ -238,19 +283,53 @@ class Wavefunction rng_.seed(s); } + void reorder_wavefunction(unsigned qubitLoc, unsigned newPos) const + { + // swap qubits in wfn between qubitLoc and newPos + if (newPos != qubitLoc) + { + for (std::size_t i = 0ull; i < wfn_.size(); i++) + { + std::size_t bit1 = (i >> qubitLoc) & 1ull; + std::size_t bit2 = (i >> newPos) & 1ull; + std::size_t x = (bit1 ^ bit2); + x = (x << qubitLoc) | (x << newPos); + std::size_t new_i = i ^ x; + if (new_i > i) + { + std::iter_swap(wfn_.begin() + i, wfn_.begin() + new_i); + } + } + // get id of qubit located at newPos and qubitLoc - getting index from the element + auto newQubitLocItr = std::find(qubitmap_.begin(), qubitmap_.end(), newPos); + assert(newQubitLocItr != qubitmap_.end()); + auto origQubitLocItr = std::find(qubitmap_.begin(), qubitmap_.end(), qubitLoc); + assert(origQubitLocItr != qubitmap_.end()); + // swap elements in qubitmap located at iterators + std::iter_swap(origQubitLocItr, newQubitLocItr); + } + } + /// generic application of a gate template void apply(Gate const& g) { - fused_.apply(wfn_, g.matrix(), qubit(g)); + //check flush condition + if (fused_.shouldFlush(wfn_, std::vector{}, g.qubit())) { + flush(); + } + fused_.apply(wfn_, g.matrix(), g.qubit()); } - + /// generic application of a multiply controlled gate template void apply_controlled(std::vector cs, Gate const& g) { std::vector pcs = qubits(cs); - fused_.apply_controlled(wfn_, g.matrix(), pcs, qubit(g)); + if (fused_.shouldFlush(wfn_, cs, g.qubit())) { + flush(); + } + fused_.apply_controlled(wfn_, g.matrix(), cs, g.qubit()); } /// generic application of a controlled gate @@ -274,7 +353,8 @@ class Wavefunction template bool subsytemwavefunction(std::vector const& qs, std::vector& qubitswfn, double tolerance) { - return fused_.subsytemwavefunction(wfn_, qubits(qs), qubitswfn, tolerance); + flush(); // we have to flush before we can extract the state + return kernels::subsytemwavefunction(wfn_, qubits(qs), qubitswfn, tolerance); } @@ -338,7 +418,7 @@ class Wavefunction private: unsigned num_qubits_; // for convenience mutable WavefunctionStorage wfn_; // storing the wave function - std::vector qubitmap_; // mapping of logical to physical qubits + mutable std::vector qubitmap_; // mapping of logical to physical qubits int usage_; // randomness support