From a2fa1643498bcf55795989ee4ebde22cbb10aeda Mon Sep 17 00:00:00 2001 From: Alexander Balabin Date: Wed, 29 Apr 2026 17:16:53 +0100 Subject: [PATCH] fix expression vocab memory management and incorrect reallocation of aggregates storage fix and improve the sys_info example Signed-off-by: Alexander Balabin --- examples/blocks/src/dataset/index.html | 55 +++++++++++++++---- .../perspective/src/cpp/expression_vocab.cpp | 17 ++++-- .../cpp/perspective/src/cpp/sparse_tree.cpp | 8 +-- .../include/perspective/expression_vocab.h | 1 + 4 files changed, 57 insertions(+), 24 deletions(-) diff --git a/examples/blocks/src/dataset/index.html b/examples/blocks/src/dataset/index.html index 11c949f92f..135a23d762 100644 --- a/examples/blocks/src/dataset/index.html +++ b/examples/blocks/src/dataset/index.html @@ -63,6 +63,10 @@ Update delay (ms) +
+ Index + +
Float columns @@ -147,15 +151,40 @@ }; const new_row = () => assign((name, x) => cell_args[name](x)); - const gen_data = async () => { + let i = 0; + const gen_tbl = async () => { + const name = i == 0 ? `superstore` : `superstore_${i}`; + const opts = { name: name }; + if (window["index"].value) { + opts.index = window["index"].value; + } + const tbl = await client.table(new_schema(), opts); + + if (i > 0) { + const layout = await psp_workspace.save(); + for (const view of Object.values(layout['viewers'])) { + if (view.table.startsWith('superstore')) { + view.table = name; + } + } + await psp_workspace.restore(layout); + } + + i += 1; + return tbl; + }; + const gen_data = async (state) => { reset_strings_cache(); let nrows = num_rows.value; let rows = []; const batch_size = Math.floor(nrows / num_batches.value); const batch_freq = batch_delay.value; - const tbl = await client.table(new_schema(), { name: "superstore" }); (function batch() { while (nrows > 0) { + const tbl = state.table; + if (state.stop) { + return; + } rows.push(new_row()); nrows--; if (nrows % batch_size === 0) { @@ -166,24 +195,26 @@ } } })(); - - return tbl; }; // GUI - const make_run_click_callback = (state) => async () => { + const make_table = (state) => async () => { state.table?.delete?.({ lazy: true }); - state.table = gen_data(); - // await window.psp_workspace.addTable("superstore", state.table); + state.table = await gen_tbl(); + state.stop = false; + }; + + const make_run_click_callback = (state) => async () => { + if (state.stop) { + await make_table(state)(); + } + gen_data(state); }; const make_del_click_callback = (state) => async () => { if (state.table) { - // await viewer.eject(); - // await window.psp_workspace.removeTable("superstore"); - await state.table.then((x) => x.delete({ lazy: true })); - state.table = undefined; + state.stop = true; } }; @@ -192,7 +223,7 @@ // Main run.addEventListener("click", make_run_click_callback(state)); del.addEventListener("click", make_del_click_callback(state)); - run.dispatchEvent(new Event("click")); + make_table(state)(); const stats_table = await client.table( { diff --git a/rust/perspective-server/cpp/perspective/src/cpp/expression_vocab.cpp b/rust/perspective-server/cpp/perspective/src/cpp/expression_vocab.cpp index ef6530657c..73b6e4208b 100644 --- a/rust/perspective-server/cpp/perspective/src/cpp/expression_vocab.cpp +++ b/rust/perspective-server/cpp/perspective/src/cpp/expression_vocab.cpp @@ -15,8 +15,9 @@ namespace perspective { t_expression_vocab::t_expression_vocab() { - // Allocate 4096 bytes per page - m_max_vocab_size = 64 * 64; + // Allocate 4096 per page initially but allow to grow to 4MB + m_initial_vocab_size = 64 * 64; + m_max_vocab_size = 1024 * 64 * 64; // Always start with one vocab allocate_new_vocab(); @@ -24,14 +25,20 @@ t_expression_vocab::t_expression_vocab() { const char* t_expression_vocab::intern(const char* str) { - std::size_t bytelength = strlen(str); + t_uindex existing_idx; + for (auto& current_vocab : m_vocabs) { + if (current_vocab.string_exists(str, existing_idx)) { + return current_vocab.unintern_c(existing_idx); + } + } + std::size_t bytelength = strlen(str); if (m_current_vocab_size + bytelength + 1 > m_max_vocab_size) { allocate_new_vocab(); } + t_vocab& current_vocab = *m_vocabs.begin(); m_current_vocab_size += bytelength + 1; - t_vocab& current_vocab = m_vocabs[0]; t_uindex interned_idx = current_vocab.get_interned(str); return current_vocab.unintern_c(interned_idx); } @@ -63,7 +70,7 @@ void t_expression_vocab::allocate_new_vocab() { t_vocab vocab; vocab.init(false); - vocab.reserve(m_max_vocab_size, 64); + vocab.reserve(m_initial_vocab_size, 64); m_vocabs.insert(m_vocabs.begin(), std::move(vocab)); m_current_vocab_size = 0; } diff --git a/rust/perspective-server/cpp/perspective/src/cpp/sparse_tree.cpp b/rust/perspective-server/cpp/perspective/src/cpp/sparse_tree.cpp index 8156a515d3..f0202b446c 100644 --- a/rust/perspective-server/cpp/perspective/src/cpp/sparse_tree.cpp +++ b/rust/perspective-server/cpp/perspective/src/cpp/sparse_tree.cpp @@ -877,12 +877,6 @@ t_stree::update_shape_from_static(const t_dtree_ctx& ctx) { if (iter == m_nodes->get().end()) { // create node and enqueue sptidx = genidx(); - t_uindex aggsize = m_aggregates->size(); - if (sptidx == aggsize) { - double scale = 1.3; - t_uindex new_size = scale * aggsize; - m_aggregates->extend(new_size); - } t_uindex dst_ridx = gen_aggidx(); @@ -1105,7 +1099,7 @@ t_stree::gen_aggidx() { t_uindex rval = m_cur_aggidx; ++m_cur_aggidx; if (rval >= cur_cap) { - double nrows = ceil(.3 * double(rval)); + double nrows = ceil(1.3 * double(rval)); m_aggregates->extend(static_cast(nrows)); } diff --git a/rust/perspective-server/cpp/perspective/src/include/perspective/expression_vocab.h b/rust/perspective-server/cpp/perspective/src/include/perspective/expression_vocab.h index 15f0f42036..2759bb11d3 100644 --- a/rust/perspective-server/cpp/perspective/src/include/perspective/expression_vocab.h +++ b/rust/perspective-server/cpp/perspective/src/include/perspective/expression_vocab.h @@ -59,6 +59,7 @@ class PERSPECTIVE_EXPORT t_expression_vocab { // EXPRESSION_VOCAB_CAPACITY * 64 bytes, allocate a new page. // TODO: this leaves edge cases where we allocate new pages too eagerly, // or we aren't using the allocated space as efficiently as possible. + std::size_t m_initial_vocab_size; std::size_t m_max_vocab_size; std::size_t m_current_vocab_size;