Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 57 additions & 57 deletions be/src/vec/common/hash_table/hash_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -421,63 +421,6 @@ class HashTable : private boost::noncopyable,
}
}

/// Increase the size of the buffer.
void resize(size_t for_num_elems = 0, size_t for_buf_size = 0) {
SCOPED_RAW_TIMER(&_resize_timer_ns);
#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
Stopwatch watch;
#endif

size_t old_size = grower.buf_size();

/** In case of exception for the object to remain in the correct state,
* changing the variable `grower` (which determines the buffer size of the hash table)
* is postponed for a moment after a real buffer change.
* The temporary variable `new_grower` is used to determine the new size.
*/
Grower new_grower = grower;
if (for_num_elems) {
new_grower.set(for_num_elems);
if (new_grower.buf_size() <= old_size) return;
} else if (for_buf_size) {
new_grower.set_buf_size(for_buf_size);
if (new_grower.buf_size() <= old_size) return;
} else
new_grower.increase_size();

/// Expand the space.
buf = reinterpret_cast<Cell*>(Allocator::realloc(buf, get_buffer_size_in_bytes(),
new_grower.buf_size() * sizeof(Cell)));
grower = new_grower;

/** Now some items may need to be moved to a new location.
* The element can stay in place, or move to a new location "on the right",
* or move to the left of the collision resolution chain, because the elements to the left of it have been moved to the new "right" location.
*/
size_t i = 0;
for (; i < old_size; ++i)
if (!buf[i].is_zero(*this) && !buf[i].is_deleted())
reinsert(buf[i], buf[i].get_hash(*this));

/** There is also a special case:
* if the element was to be at the end of the old buffer, [ x]
* but is at the beginning because of the collision resolution chain, [o x]
* then after resizing, it will first be out of place again, [ xo ]
* and in order to transfer it where necessary,
* after transferring all the elements from the old halves you need to [ o x ]
* process tail from the collision resolution chain immediately after it [ o x ]
*/
for (; !buf[i].is_zero(*this) && !buf[i].is_deleted(); ++i)
reinsert(buf[i], buf[i].get_hash(*this));

#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
watch.stop();
std::cerr << std::fixed << std::setprecision(3) << "Resize from " << old_size << " to "
<< grower.buf_size() << " took " << watch.elapsedSeconds() << " sec."
<< std::endl;
#endif
}

/** Paste into the new buffer the value that was in the old buffer.
* Used when increasing the buffer size.
*/
Expand Down Expand Up @@ -613,6 +556,63 @@ class HashTable : private boost::noncopyable,
return *this;
}

/// Increase the size of the buffer.
void resize(size_t for_num_elems = 0, size_t for_buf_size = 0) {
SCOPED_RAW_TIMER(&_resize_timer_ns);
#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
Stopwatch watch;
#endif

size_t old_size = grower.buf_size();

/** In case of exception for the object to remain in the correct state,
* changing the variable `grower` (which determines the buffer size of the hash table)
* is postponed for a moment after a real buffer change.
* The temporary variable `new_grower` is used to determine the new size.
*/
Grower new_grower = grower;
if (for_num_elems) {
new_grower.set(for_num_elems);
if (new_grower.buf_size() <= old_size) return;
} else if (for_buf_size) {
new_grower.set_buf_size(for_buf_size);
if (new_grower.buf_size() <= old_size) return;
} else
new_grower.increase_size();

/// Expand the space.
buf = reinterpret_cast<Cell*>(Allocator::realloc(buf, get_buffer_size_in_bytes(),
new_grower.buf_size() * sizeof(Cell)));
grower = new_grower;

/** Now some items may need to be moved to a new location.
* The element can stay in place, or move to a new location "on the right",
* or move to the left of the collision resolution chain, because the elements to the left of it have been moved to the new "right" location.
*/
size_t i = 0;
for (; i < old_size; ++i)
if (!buf[i].is_zero(*this) && !buf[i].is_deleted())
reinsert(buf[i], buf[i].get_hash(*this));

/** There is also a special case:
* if the element was to be at the end of the old buffer, [ x]
* but is at the beginning because of the collision resolution chain, [o x]
* then after resizing, it will first be out of place again, [ xo ]
* and in order to transfer it where necessary,
* after transferring all the elements from the old halves you need to [ o x ]
* process tail from the collision resolution chain immediately after it [ o x ]
*/
for (; !buf[i].is_zero(*this) && !buf[i].is_deleted(); ++i)
reinsert(buf[i], buf[i].get_hash(*this));

#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
watch.stop();
std::cerr << std::fixed << std::setprecision(3) << "Resize from " << old_size << " to "
<< grower.buf_size() << " took " << watch.elapsedSeconds() << " sec."
<< std::endl;
#endif
}

class iterator : public iterator_base<iterator, false> {
public:
using iterator_base<iterator, false>::iterator_base;
Expand Down
3 changes: 3 additions & 0 deletions be/src/vec/exec/join/vhash_join_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ struct ProcessHashTableBuild {
_join_node->_mem_used += bucket_bytes - old_bucket_bytes;
COUNTER_SET(_join_node->_build_buckets_counter, bucket_size);
}};
//resize ahead is good for keys whose values are almost unique.
//we only resize for the first collected block(<=4G)
hash_table_ctx.hash_table.resize(_rows);

KeyGetter key_getter(_build_raw_ptrs, _join_node->_build_key_sz, nullptr);

Expand Down