Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 17 additions & 16 deletions include/build_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,53 +145,53 @@ void build_reference_sketches(index_type const& index,
out.close();
}

template <typename Iterator>
template <typename Index>
void build_colors_sketches_sliced(
uint64_t num_colors, uint64_t num_color_sets, function<Iterator(uint64_t)> colors,
Index const& index,
uint64_t p, // use 2^p bytes per HLL sketch
uint64_t num_threads, // num. threads for construction
std::string output_filename, // where the sketches will be serialized
double left, double right) //
{
assert(num_threads > 0);

const uint64_t num_colors = index.num_colors();
const uint64_t num_color_sets = index.num_color_sets();

const double min_size = left * num_colors;
const double max_size = right * num_colors;
assert(min_size >= 0);
assert(max_size <= num_colors);

if (num_color_sets < num_threads) { num_threads = num_color_sets; }

std::vector<Iterator> filtered_colors;
uint64_t load = 0;
std::vector<uint64_t> filtered_colors_ids;
filtered_colors_ids.reserve(num_color_sets);
for (uint64_t color_id = 0; color_id != num_color_sets; ++color_id) {
auto it = colors(color_id);
auto it = index.color_set(color_id);
uint64_t size = it.size();
if (size > min_size && size <= max_size) {
filtered_colors.push_back(it);
load += size;
filtered_colors_ids.push_back(color_id);
}
}
const uint64_t partition_size = filtered_colors.size();
const uint64_t partition_size = filtered_colors_ids.size();

struct slice {
uint64_t begin, end; // [..)
};
std::vector<slice> thread_slices;

uint64_t load = 0;
{
for (auto it : filtered_colors) { load += it.size(); }
}

uint64_t load_per_thread = load / num_threads;
{
slice s;
s.begin = 0;
uint64_t curr_load = 0;

for (uint64_t i = 0; i != partition_size; ++i) {
auto it = filtered_colors[i];
auto color_id = filtered_colors_ids[i];
auto it = index.color_set(color_id);
curr_load += it.size();
if (curr_load >= load_per_thread || i == partition_size - 1) {
s.end = i + 1;
Expand All @@ -211,14 +211,15 @@ void build_colors_sketches_sliced(
auto s = thread_slices[thread_id];
sketches = std::vector<sketch::hll_t>(s.end - s.begin, sketch::hll_t(p));

for (uint64_t color_id = s.begin; color_id != s.end; ++color_id) {
auto it = filtered_colors[color_id];
for (uint64_t i = s.begin; i != s.end; ++i) {
auto color_id = filtered_colors_ids[i];
auto it = index.color_set(color_id);
const uint64_t size = it.size();
assert(size > 0);
for (uint64_t i = 0; i < size; ++i, ++it) {
for (uint64_t j = 0; j < size; ++j, ++it) {
uint64_t ref_id = *it;
assert(ref_id < num_colors);
sketches[color_id - s.begin].addh(ref_id);
sketches[i - s.begin].addh(ref_id);
}
}
};
Expand Down
8 changes: 2 additions & 6 deletions include/builders/differential_builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,8 @@ struct differential_permuter {
constexpr uint64_t p = 10;
for (uint64_t slice_id = 0; slice_id != num_slices; slice_id++) {
timer.start();
build_colors_sketches_sliced<hybrid::forward_iterator>(
index.num_colors(), index.num_color_sets(),
[&](uint64_t color_set_id) -> hybrid::forward_iterator {
return index.color_set(color_set_id);
},
p, m_build_config.num_threads,
build_colors_sketches_sliced(
index, p, m_build_config.num_threads,
m_build_config.tmp_dirname + "/sketches" + std::to_string(slice_id) + ".bin",
slices[slice_id], slices[slice_id + 1]);
timer.stop();
Expand Down
Loading