diff --git a/external/bits b/external/bits index 63ef155..f5498a8 160000 --- a/external/bits +++ b/external/bits @@ -1 +1 @@ -Subproject commit 63ef155259d823976ff3bad3f803afba5ab4573f +Subproject commit f5498a856f13e657d62c78afa22448c16d335972 diff --git a/include/partitioned_phf.hpp b/include/partitioned_phf.hpp index 8426e3d..ff5e251 100644 --- a/include/partitioned_phf.hpp +++ b/include/partitioned_phf.hpp @@ -94,7 +94,8 @@ struct partitioned_phf // m_num_keys = builder.num_keys(); m_table_size = builder.table_size(); m_partitioner = builder.bucketer(); - m_partitions.resize(num_partitions); + std::vector partitions; + partitions.resize(num_partitions); auto const& offsets = builder.offsets(); auto const& builders = builder.builders(); @@ -104,8 +105,8 @@ struct partitioned_phf // std::vector threads(num_threads); auto exe = [&](uint64_t begin, uint64_t end) { for (; begin != end; ++begin) { - m_partitions[begin].offset = offsets[begin]; - m_partitions[begin].f.build(builders[begin], config); + partitions[begin].offset = offsets[begin]; + partitions[begin].f.build(builders[begin], config); } }; @@ -123,11 +124,13 @@ struct partitioned_phf // } } else { for (uint64_t i = 0; i != num_partitions; ++i) { - m_partitions[i].offset = offsets[i]; - m_partitions[i].f.build(builders[i], config); + partitions[i].offset = offsets[i]; + partitions[i].f.build(builders[i], config); } } + m_partitions = std::move(partitions); + auto stop = clock_type::now(); return to_microseconds(stop - start); @@ -147,7 +150,7 @@ struct partitioned_phf // uint64_t num_bits_for_pilots() const { uint64_t bits = 8 * (sizeof(m_seed) + sizeof(m_num_keys) + sizeof(m_table_size) + - sizeof(uint64_t) // for std::vector::size + sizeof(uint64_t) // for span's size ) + m_partitioner.num_bits(); for (auto const& p : m_partitions) bits += 8 * sizeof(p.offset) + p.f.num_bits_for_pilots(); @@ -200,7 +203,7 @@ struct partitioned_phf // uint64_t m_num_keys; uint64_t m_table_size; range_bucketer m_partitioner; - std::vector m_partitions; + essentials::owning_span m_partitions; }; } // namespace pthash \ No newline at end of file diff --git a/include/utils/dense_encoders.hpp b/include/utils/dense_encoders.hpp index 422c26f..01e7ece 100644 --- a/include/utils/dense_encoders.hpp +++ b/include/utils/dense_encoders.hpp @@ -68,16 +68,17 @@ struct dense_interleaved : dense_encoder { const uint64_t num_partitions, // const uint64_t num_buckets_per_partition, const uint64_t num_threads) // { - m_encoders.resize(num_buckets_per_partition); + std::vector encoders; + encoders.resize(num_buckets_per_partition); if (num_threads == 1) { for (uint64_t i = 0; i != num_buckets_per_partition; ++i) { - m_encoders[i].encode(begin + i * num_partitions, num_partitions); + encoders[i].encode(begin + i * num_partitions, num_partitions); } } else { auto exe = [&](uint64_t beginEncoder, uint64_t endEncoder) { for (; beginEncoder != endEncoder; ++beginEncoder) { - m_encoders[beginEncoder].encode(begin + beginEncoder * num_partitions, - num_partitions); + encoders[beginEncoder].encode(begin + beginEncoder * num_partitions, + num_partitions); } }; @@ -97,6 +98,7 @@ struct dense_interleaved : dense_encoder { if (t.joinable()) t.join(); } } + m_encoders = std::move(encoders); } static std::string name() { @@ -109,7 +111,7 @@ struct dense_interleaved : dense_encoder { } uint64_t num_bits() const { - uint64_t sum = 8 * sizeof(uint64_t); // for std::vector size + uint64_t sum = 8 * sizeof(uint64_t); // for span' size for (auto const& e : m_encoders) sum += e.num_bits(); return sum; } @@ -130,7 +132,7 @@ struct dense_interleaved : dense_encoder { visitor.visit(t.m_encoders); } - std::vector m_encoders; + essentials::owning_span m_encoders; }; typedef dense_mono C_mono; diff --git a/include/utils/encoders.hpp b/include/utils/encoders.hpp index b55f2ba..a2ff85e 100644 --- a/include/utils/encoders.hpp +++ b/include/utils/encoders.hpp @@ -58,8 +58,9 @@ struct partitioned_compact { uint64_t num_partitions = (n + partition_size - 1) / partition_size; bits::bit_vector::builder bvb; bvb.reserve(32 * n); - m_bits_per_value.reserve(num_partitions + 1); - m_bits_per_value.push_back(0); + std::vector bits_per_value; + bits_per_value.reserve(num_partitions + 1); + bits_per_value.push_back(0); for (uint64_t i = 0, begin_partition = 0; i != num_partitions; ++i) { uint64_t end_partition = begin_partition + partition_size; if (end_partition > n) end_partition = n; @@ -76,10 +77,11 @@ struct partitioned_compact { for (uint64_t k = begin_partition; k != end_partition; ++k) { bvb.append_bits(*(begin + k), num_bits); } - assert(m_bits_per_value.back() + num_bits < (1ULL << 32)); - m_bits_per_value.push_back(m_bits_per_value.back() + num_bits); + assert(bits_per_value.back() + num_bits < (1ULL << 32)); + bits_per_value.push_back(bits_per_value.back() + num_bits); begin_partition = end_partition; } + m_bits_per_value = std::move(bits_per_value); bvb.build(m_values); } @@ -123,7 +125,7 @@ struct partitioned_compact { } uint64_t m_size; - std::vector m_bits_per_value; + essentials::owning_span m_bits_per_value; bits::bit_vector m_values; }; diff --git a/src/example.cpp b/src/example.cpp index a33cf84..c9a4c2c 100644 --- a/src/example.cpp +++ b/src/example.cpp @@ -8,7 +8,7 @@ int main() { /* Generate 1M random 64-bit keys as input data. */ static const uint64_t num_keys = 1'000'000; - static const uint64_t seed = 1234567890; + static const uint64_t seed = essentials::get_random_seed(); std::cout << "generating input data..." << std::endl; auto keys = distinct_uints(num_keys, seed); // Can also use: @@ -78,8 +78,23 @@ int main() { { /* Now reload from disk and query. */ + std::cout << "loading the function from disk..." << std::endl; pthash_type other; essentials::load(other, output_filename.c_str()); + if (check(keys.begin(), other)) std::cout << "EVERYTHING OK!" << std::endl; + const uint64_t n = std::min(10, keys.size()); + for (uint64_t i = 0; i != n; ++i) { + std::cout << "f(" << keys[i] << ") = " << other(keys[i]) << '\n'; + assert(f(keys[i]) == other(keys[i])); + } + } + + { + /* Now mmap from disk and query. */ + std::cout << "mapping the function from disk..." << std::endl; + pthash_type other; + essentials::mmap(other, output_filename.c_str()); + if (check(keys.begin(), other)) std::cout << "EVERYTHING OK!" << std::endl; const uint64_t n = std::min(10, keys.size()); for (uint64_t i = 0; i != n; ++i) { std::cout << "f(" << keys[i] << ") = " << other(keys[i]) << '\n'; @@ -89,4 +104,4 @@ int main() { std::remove(output_filename.c_str()); return 0; -} \ No newline at end of file +}