Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions include/partitioned_phf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ struct partitioned_phf //
m_num_keys = builder.num_keys();
m_table_size = builder.table_size();
m_partitioner = builder.bucketer();
m_partitions.resize(num_partitions);
std::vector<partition> partitions;
partitions.resize(num_partitions);

auto const& offsets = builder.offsets();
auto const& builders = builder.builders();
Expand All @@ -104,8 +105,8 @@ struct partitioned_phf //
std::vector<std::thread> threads(num_threads);
auto exe = [&](uint64_t begin, uint64_t end) {
for (; begin != end; ++begin) {
m_partitions[begin].offset = offsets[begin];
m_partitions[begin].f.build(builders[begin], config);
partitions[begin].offset = offsets[begin];
partitions[begin].f.build(builders[begin], config);
}
};

Expand All @@ -123,11 +124,13 @@ struct partitioned_phf //
}
} else {
for (uint64_t i = 0; i != num_partitions; ++i) {
m_partitions[i].offset = offsets[i];
m_partitions[i].f.build(builders[i], config);
partitions[i].offset = offsets[i];
partitions[i].f.build(builders[i], config);
}
}

m_partitions = std::move(partitions);

auto stop = clock_type::now();

return to_microseconds(stop - start);
Expand All @@ -147,7 +150,7 @@ struct partitioned_phf //

uint64_t num_bits_for_pilots() const {
uint64_t bits = 8 * (sizeof(m_seed) + sizeof(m_num_keys) + sizeof(m_table_size) +
sizeof(uint64_t) // for std::vector::size
sizeof(uint64_t) // for span's size
) +
m_partitioner.num_bits();
for (auto const& p : m_partitions) bits += 8 * sizeof(p.offset) + p.f.num_bits_for_pilots();
Expand Down Expand Up @@ -200,7 +203,7 @@ struct partitioned_phf //
uint64_t m_num_keys;
uint64_t m_table_size;
range_bucketer m_partitioner;
std::vector<partition> m_partitions;
essentials::owning_span<partition> m_partitions;
};

} // namespace pthash
14 changes: 8 additions & 6 deletions include/utils/dense_encoders.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,17 @@ struct dense_interleaved : dense_encoder {
const uint64_t num_partitions, //
const uint64_t num_buckets_per_partition, const uint64_t num_threads) //
{
m_encoders.resize(num_buckets_per_partition);
std::vector<Encoder> encoders;
encoders.resize(num_buckets_per_partition);
if (num_threads == 1) {
for (uint64_t i = 0; i != num_buckets_per_partition; ++i) {
m_encoders[i].encode(begin + i * num_partitions, num_partitions);
encoders[i].encode(begin + i * num_partitions, num_partitions);
}
} else {
auto exe = [&](uint64_t beginEncoder, uint64_t endEncoder) {
for (; beginEncoder != endEncoder; ++beginEncoder) {
m_encoders[beginEncoder].encode(begin + beginEncoder * num_partitions,
num_partitions);
encoders[beginEncoder].encode(begin + beginEncoder * num_partitions,
num_partitions);
}
};

Expand All @@ -97,6 +98,7 @@ struct dense_interleaved : dense_encoder {
if (t.joinable()) t.join();
}
}
m_encoders = std::move(encoders);
}

static std::string name() {
Expand All @@ -109,7 +111,7 @@ struct dense_interleaved : dense_encoder {
}

uint64_t num_bits() const {
uint64_t sum = 8 * sizeof(uint64_t); // for std::vector size
uint64_t sum = 8 * sizeof(uint64_t); // for span' size
for (auto const& e : m_encoders) sum += e.num_bits();
return sum;
}
Expand All @@ -130,7 +132,7 @@ struct dense_interleaved : dense_encoder {
visitor.visit(t.m_encoders);
}

std::vector<Encoder> m_encoders;
essentials::owning_span<Encoder> m_encoders;
};

typedef dense_mono<compact> C_mono;
Expand Down
12 changes: 7 additions & 5 deletions include/utils/encoders.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ struct partitioned_compact {
uint64_t num_partitions = (n + partition_size - 1) / partition_size;
bits::bit_vector::builder bvb;
bvb.reserve(32 * n);
m_bits_per_value.reserve(num_partitions + 1);
m_bits_per_value.push_back(0);
std::vector<uint32_t> bits_per_value;
bits_per_value.reserve(num_partitions + 1);
bits_per_value.push_back(0);
for (uint64_t i = 0, begin_partition = 0; i != num_partitions; ++i) {
uint64_t end_partition = begin_partition + partition_size;
if (end_partition > n) end_partition = n;
Expand All @@ -76,10 +77,11 @@ struct partitioned_compact {
for (uint64_t k = begin_partition; k != end_partition; ++k) {
bvb.append_bits(*(begin + k), num_bits);
}
assert(m_bits_per_value.back() + num_bits < (1ULL << 32));
m_bits_per_value.push_back(m_bits_per_value.back() + num_bits);
assert(bits_per_value.back() + num_bits < (1ULL << 32));
bits_per_value.push_back(bits_per_value.back() + num_bits);
begin_partition = end_partition;
}
m_bits_per_value = std::move(bits_per_value);
bvb.build(m_values);
}

Expand Down Expand Up @@ -123,7 +125,7 @@ struct partitioned_compact {
}

uint64_t m_size;
std::vector<uint32_t> m_bits_per_value;
essentials::owning_span<uint32_t> m_bits_per_value;
bits::bit_vector m_values;
};

Expand Down
19 changes: 17 additions & 2 deletions src/example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ int main() {

/* Generate 1M random 64-bit keys as input data. */
static const uint64_t num_keys = 1'000'000;
static const uint64_t seed = 1234567890;
static const uint64_t seed = essentials::get_random_seed();
std::cout << "generating input data..." << std::endl;
auto keys = distinct_uints<uint64_t>(num_keys, seed);
// Can also use:
Expand Down Expand Up @@ -78,8 +78,23 @@ int main() {

{
/* Now reload from disk and query. */
std::cout << "loading the function from disk..." << std::endl;
pthash_type other;
essentials::load(other, output_filename.c_str());
if (check(keys.begin(), other)) std::cout << "EVERYTHING OK!" << std::endl;
const uint64_t n = std::min<uint64_t>(10, keys.size());
for (uint64_t i = 0; i != n; ++i) {
std::cout << "f(" << keys[i] << ") = " << other(keys[i]) << '\n';
assert(f(keys[i]) == other(keys[i]));
}
}

{
/* Now mmap from disk and query. */
std::cout << "mapping the function from disk..." << std::endl;
pthash_type other;
essentials::mmap(other, output_filename.c_str());
if (check(keys.begin(), other)) std::cout << "EVERYTHING OK!" << std::endl;
const uint64_t n = std::min<uint64_t>(10, keys.size());
for (uint64_t i = 0; i != n; ++i) {
std::cout << "f(" << keys[i] << ") = " << other(keys[i]) << '\n';
Expand All @@ -89,4 +104,4 @@ int main() {

std::remove(output_filename.c_str());
return 0;
}
}
Loading