From b815bea01f5431933fa41b30825a85137d6bcc6d Mon Sep 17 00:00:00 2001 From: Vincent Weevers Date: Sun, 12 Sep 2021 21:23:55 +0200 Subject: [PATCH 1/5] Make `db.clear()` 27x faster by doing it natively Because this uses an iterator under the hood, it also refactors shared code between `db.iterator()` and `db.clear()`. --- binding.cc | 465 +++++++++++++++++++++++++++--------------- leveldown.js | 4 + test/iterator-test.js | 22 ++ 3 files changed, 321 insertions(+), 170 deletions(-) diff --git a/binding.cc b/binding.cc index 858ef037..cc6e57cb 100644 --- a/binding.cc +++ b/binding.cc @@ -156,7 +156,7 @@ static uint32_t Uint32Property (napi_env env, napi_value obj, const char* key, } /** - * Returns a uint32 property 'key' from 'obj'. + * Returns a int32 property 'key' from 'obj'. * Returns 'DEFAULT' if the property doesn't exist. */ static int Int32Property (napi_env env, napi_value obj, const char* key, @@ -291,11 +291,13 @@ struct BaseWorker { self->DoExecute(); } - void SetStatus (leveldb::Status status) { + bool SetStatus (leveldb::Status status) { status_ = status; if (!status.ok()) { SetErrorMessage(status.ToString().c_str()); + return false; } + return true; } void SetErrorMessage(const char *msg) { @@ -487,49 +489,35 @@ struct PriorityWorker : public BaseWorker { /** * Owns a leveldb iterator. */ -struct Iterator { - Iterator (Database* database, - uint32_t id, - bool reverse, - bool keys, - bool values, - int limit, - std::string* lt, - std::string* lte, - std::string* gt, - std::string* gte, - bool fillCache, - bool keyAsBuffer, - bool valueAsBuffer, - uint32_t highWaterMark) +struct BaseIterator { + BaseIterator(Database* database, + bool reverse, + std::string* lt, + std::string* lte, + std::string* gt, + std::string* gte, + int limit, + bool fillCache) : database_(database), - id_(id), + isEnding_(false), + hasEnded_(false), + didSeek_(false), reverse_(reverse), - keys_(keys), - values_(values), - limit_(limit), lt_(lt), lte_(lte), gt_(gt), gte_(gte), - keyAsBuffer_(keyAsBuffer), - valueAsBuffer_(valueAsBuffer), - highWaterMark_(highWaterMark), - dbIterator_(NULL), + limit_(limit), count_(0), - seeking_(false), - landed_(false), - nexting_(false), - ended_(false), - endWorker_(NULL), - ref_(NULL) { + eof_(false) { options_ = new leveldb::ReadOptions(); options_->fill_cache = fillCache; options_->snapshot = database->NewSnapshot(); + dbIterator_ = database_->NewIterator(options_); } - ~Iterator () { - assert(ended_); + ~BaseIterator () { + assert(hasEnded_); if (lt_ != NULL) delete lt_; if (gt_ != NULL) delete gt_; @@ -539,39 +527,15 @@ struct Iterator { delete options_; } - void Attach (napi_ref ref) { - ref_ = ref; - database_->AttachIterator(id_, this); + bool DidSeek () { + return didSeek_; } - napi_ref Detach () { - database_->DetachIterator(id_); - return ref_; - } - - leveldb::Status IteratorStatus () { - return dbIterator_->status(); - } - - void IteratorEnd () { - delete dbIterator_; - dbIterator_ = NULL; - database_->ReleaseSnapshot(options_->snapshot); - } - - void CheckEndCallback () { - nexting_ = false; - - if (endWorker_ != NULL) { - endWorker_->Queue(); - endWorker_ = NULL; - } - } - - bool GetIterator () { - if (dbIterator_ != NULL) return false; - - dbIterator_ = database_->NewIterator(options_); + /** + * Seek to the first relevant key based on range options. + */ + void SeekToRange () { + didSeek_ = true; if (!reverse_ && gte_ != NULL) { dbIterator_->Seek(*gte_); @@ -602,44 +566,91 @@ struct Iterator { } else { dbIterator_->SeekToFirst(); } - - return true; } - bool Read (std::string& key, std::string& value) { - if (!GetIterator() && !seeking_) { + /** + * Seek manually (during iteration). + */ + void Seek (leveldb::Slice& target) { + didSeek_ = true; + + if (OutOfRange(target)) { if (reverse_) { + dbIterator_->SeekToFirst(); dbIterator_->Prev(); - } - else { + } else { + dbIterator_->SeekToLast(); dbIterator_->Next(); } + + return; } - seeking_ = false; + dbIterator_->Seek(target); if (dbIterator_->Valid()) { - std::string keyStr = dbIterator_->key().ToString(); - - if ((limit_ < 0 || ++count_ <= limit_) - && ( lt_ != NULL ? (lt_->compare(keyStr) > 0) - : lte_ != NULL ? (lte_->compare(keyStr) >= 0) - : true ) - && ( gt_ != NULL ? (gt_->compare(keyStr) < 0) - : gte_ != NULL ? (gte_->compare(keyStr) <= 0) - : true ) - ) { - if (keys_) { - key.assign(dbIterator_->key().data(), dbIterator_->key().size()); - } - if (values_) { - value.assign(dbIterator_->value().data(), dbIterator_->value().size()); + int cmp = dbIterator_->key().compare(target); + if (cmp > 0 && reverse_) { + dbIterator_->Prev(); + } else if (cmp < 0 && !reverse_) { + dbIterator_->Next(); + } + } else { + if (reverse_) { + dbIterator_->SeekToLast(); + } else { + dbIterator_->SeekToFirst(); + } + if (dbIterator_->Valid()) { + int cmp = dbIterator_->key().compare(target); + if (cmp > 0 && reverse_) { + dbIterator_->SeekToFirst(); + dbIterator_->Prev(); + } else if (cmp < 0 && !reverse_) { + dbIterator_->SeekToLast(); + dbIterator_->Next(); } - return true; } } + } - return false; + void End () { + if (!hasEnded_) { + hasEnded_ = true; + delete dbIterator_; + dbIterator_ = NULL; + database_->ReleaseSnapshot(options_->snapshot); + } + } + + bool ReadOne () { + if (eof_ || !dbIterator_->Valid()) { + return false; + } + + if ((limit_ >= 0 && ++count_ > limit_) || OutOfRange(dbIterator_->key())) { + eof_ = true; + return false; + } + + return true; + } + + void Advance () { + if (reverse_) dbIterator_->Prev(); + else dbIterator_->Next(); + } + + leveldb::Slice CurrentKey () { + return dbIterator_->key(); + } + + leveldb::Slice CurrentValue () { + return dbIterator_->value(); + } + + leveldb::Status Status () { + return dbIterator_->status(); } bool OutOfRange (leveldb::Slice& target) { @@ -649,55 +660,119 @@ struct Iterator { (gte_ != NULL && target.compare(*gte_) < 0)); } - bool IteratorNext (std::vector >& result) { - size_t size = 0; - uint32_t cacheSize = 0; + Database* database_; + bool isEnding_; + bool hasEnded_; - while (true) { +private: + leveldb::Iterator* dbIterator_; + bool didSeek_; + bool reverse_; + std::string* lt_; + std::string* lte_; + std::string* gt_; + std::string* gte_; + int limit_; + int count_; + bool eof_; + leveldb::ReadOptions* options_; +}; + +/** + * Extends BaseIterator for reading it from JS land. + */ +struct Iterator final : public BaseIterator { + Iterator (Database* database, + uint32_t id, + bool reverse, + bool keys, + bool values, + int limit, + std::string* lt, + std::string* lte, + std::string* gt, + std::string* gte, + bool fillCache, + bool keyAsBuffer, + bool valueAsBuffer, + uint32_t highWaterMark) + : BaseIterator(database, reverse, lt, lte, gt, gte, limit, fillCache), + id_(id), + keys_(keys), + values_(values), + keyAsBuffer_(keyAsBuffer), + valueAsBuffer_(valueAsBuffer), + highWaterMark_(highWaterMark), + landed_(false), + nexting_(false), + endWorker_(NULL), + ref_(NULL) { + } + + ~Iterator () {} + + void Attach (napi_ref ref) { + ref_ = ref; + database_->AttachIterator(id_, this); + } + + napi_ref Detach () { + database_->DetachIterator(id_); + return ref_; + } + + void CheckEndCallback () { + nexting_ = false; + + if (endWorker_ != NULL) { + endWorker_->Queue(); + endWorker_ = NULL; + } + } + + bool ReadMany (uint32_t size, std::vector>& result) { + size_t bytesRead = 0; + + while (ReadOne()) { std::string key, value; - bool ok = Read(key, value); - if (ok) { - result.push_back(std::make_pair(key, value)); + if (keys_) { + leveldb::Slice slice = CurrentKey(); + key.assign(slice.data(), slice.size()); + bytesRead += key.size(); + } - if (!landed_) { - landed_ = true; - return true; - } + if (values_) { + leveldb::Slice slice = CurrentValue(); + value.assign(slice.data(), slice.size()); + bytesRead += value.size(); + } - size = size + key.size() + value.size(); - if (size > highWaterMark_) return true; + Advance(); + result.push_back(std::make_pair(key, value)); - // Limit the size of the cache to prevent starving the event loop - // in JS-land while we're recursively calling process.nextTick(). - if (++cacheSize >= 1000) return true; - } else { - return false; + if (!landed_) { + landed_ = true; + return true; + } + + if (bytesRead > highWaterMark_ || result.size() >= size) { + return true; } } + + return false; } - Database* database_; uint32_t id_; - bool reverse_; bool keys_; bool values_; - int limit_; - std::string* lt_; - std::string* lte_; - std::string* gt_; - std::string* gte_; bool keyAsBuffer_; bool valueAsBuffer_; uint32_t highWaterMark_; - leveldb::Iterator* dbIterator_; - int count_; - bool seeking_; bool landed_; bool nexting_; - bool ended_; - leveldb::ReadOptions* options_; BaseWorker* endWorker_; private: @@ -723,12 +798,7 @@ static void env_cleanup_hook (void* arg) { std::map::iterator it; for (it = iterators.begin(); it != iterators.end(); ++it) { - Iterator* iterator = it->second; - - if (!iterator->ended_) { - iterator->ended_ = true; - iterator->IteratorEnd(); - } + it->second->End(); } // Having ended the iterators (and released snapshots) we can safely close. @@ -1043,6 +1113,91 @@ NAPI_METHOD(db_del) { NAPI_RETURN_UNDEFINED(); } +/** + * Worker class for deleting a range from a database. + */ +struct ClearWorker final : public PriorityWorker { + ClearWorker (napi_env env, + Database* database, + napi_value callback, + bool reverse, + int limit, + std::string* lt, + std::string* lte, + std::string* gt, + std::string* gte) + : PriorityWorker(env, database, callback, "leveldown.db.clear") { + baseIterator_ = new BaseIterator(database, reverse, lt, lte, gt, gte, limit, false); + writeOptions_ = new leveldb::WriteOptions(); + writeOptions_->sync = false; + } + + ~ClearWorker () { + // TODO: write GC tests + delete baseIterator_; + delete writeOptions_; + } + + void DoExecute () override { + baseIterator_->SeekToRange(); + + // TODO: add option + uint32_t hwm = 16 * 1024; + leveldb::WriteBatch batch; + + while (true) { + size_t bytesRead = 0; + + while (bytesRead < hwm && baseIterator_->ReadOne()) { + leveldb::Slice key = baseIterator_->CurrentKey(); + batch.Delete(key); + bytesRead += key.size(); + baseIterator_->Advance(); + } + + if (!SetStatus(baseIterator_->Status()) || bytesRead == 0) { + break; + } + + if (!SetStatus(database_->WriteBatch(*writeOptions_, &batch))) { + break; + } + + batch.Clear(); + } + + baseIterator_->End(); + } + +private: + BaseIterator* baseIterator_; + leveldb::WriteOptions* writeOptions_; +}; + +/** + * Delete a range from a database. + */ +NAPI_METHOD(db_clear) { + NAPI_ARGV(3); + NAPI_DB_CONTEXT(); + + napi_value options = argv[1]; + napi_value callback = argv[2]; + + bool reverse = BooleanProperty(env, options, "reverse", false); + int limit = Int32Property(env, options, "limit", -1); + + std::string* lt = RangeOption(env, options, "lt"); + std::string* lte = RangeOption(env, options, "lte"); + std::string* gt = RangeOption(env, options, "gt"); + std::string* gte = RangeOption(env, options, "gte"); + + ClearWorker* worker = new ClearWorker(env, database, callback, reverse, limit, lt, lte, gt, gte); + worker->Queue(); + + NAPI_RETURN_UNDEFINED(); +} + /** * Worker class for calculating the size of a range. */ @@ -1292,51 +1447,13 @@ NAPI_METHOD(iterator_seek) { NAPI_ARGV(2); NAPI_ITERATOR_CONTEXT(); - if (iterator->ended_) { + if (iterator->isEnding_ || iterator->hasEnded_) { napi_throw_error(env, NULL, "iterator has ended"); } leveldb::Slice target = ToSlice(env, argv[1]); - iterator->GetIterator(); - - leveldb::Iterator* dbIterator = iterator->dbIterator_; - dbIterator->Seek(target); - - iterator->seeking_ = true; iterator->landed_ = false; - - if (iterator->OutOfRange(target)) { - if (iterator->reverse_) { - dbIterator->SeekToFirst(); - dbIterator->Prev(); - } else { - dbIterator->SeekToLast(); - dbIterator->Next(); - } - } else if (dbIterator->Valid()) { - int cmp = dbIterator->key().compare(target); - if (cmp > 0 && iterator->reverse_) { - dbIterator->Prev(); - } else if (cmp < 0 && !iterator->reverse_) { - dbIterator->Next(); - } - } else { - if (iterator->reverse_) { - dbIterator->SeekToLast(); - } else { - dbIterator->SeekToFirst(); - } - if (dbIterator->Valid()) { - int cmp = dbIterator->key().compare(target); - if (cmp > 0 && iterator->reverse_) { - dbIterator->SeekToFirst(); - dbIterator->Prev(); - } else if (cmp < 0 && !iterator->reverse_) { - dbIterator->SeekToLast(); - dbIterator->Next(); - } - } - } + iterator->Seek(target); DisposeSliceBuffer(target); NAPI_RETURN_UNDEFINED(); @@ -1355,7 +1472,7 @@ struct EndWorker final : public BaseWorker { ~EndWorker () {} void DoExecute () override { - iterator_->IteratorEnd(); + iterator_->End(); } void HandleOKCallback () override { @@ -1371,9 +1488,9 @@ struct EndWorker final : public BaseWorker { * open iterators during NAPI_METHOD(db_close). */ static void iterator_end_do (napi_env env, Iterator* iterator, napi_value cb) { - if (!iterator->ended_) { + if (!iterator->isEnding_ && !iterator->hasEnded_) { EndWorker* worker = new EndWorker(env, iterator, cb); - iterator->ended_ = true; + iterator->isEnding_ = true; if (iterator->nexting_) { iterator->endWorker_ = worker; @@ -1409,9 +1526,16 @@ struct NextWorker final : public BaseWorker { ~NextWorker () {} void DoExecute () override { - ok_ = iterator_->IteratorNext(result_); + if (!iterator_->DidSeek()) { + iterator_->SeekToRange(); + } + + // Limit the size of the cache to prevent starving the event loop + // in JS-land while we're recursively calling process.nextTick(). + ok_ = iterator_->ReadMany(1000, result_); + if (!ok_) { - SetStatus(iterator_->IteratorStatus()); + SetStatus(iterator_->Status()); } } @@ -1470,7 +1594,7 @@ NAPI_METHOD(iterator_next) { napi_value callback = argv[1]; - if (iterator->ended_) { + if (iterator->isEnding_ || iterator->hasEnded_) { napi_value argv = CreateError(env, "iterator has ended"); CallFunction(env, callback, 1, &argv); @@ -1734,6 +1858,7 @@ NAPI_INIT() { NAPI_EXPORT_FUNCTION(db_put); NAPI_EXPORT_FUNCTION(db_get); NAPI_EXPORT_FUNCTION(db_del); + NAPI_EXPORT_FUNCTION(db_clear); NAPI_EXPORT_FUNCTION(db_approximate_size); NAPI_EXPORT_FUNCTION(db_compact_range); NAPI_EXPORT_FUNCTION(db_get_property); diff --git a/leveldown.js b/leveldown.js index 67f993e5..226a518c 100644 --- a/leveldown.js +++ b/leveldown.js @@ -63,6 +63,10 @@ LevelDOWN.prototype._del = function (key, options, callback) { binding.db_del(this.context, key, options, callback) } +LevelDOWN.prototype._clear = function (options, callback) { + binding.db_clear(this.context, options, callback) +} + LevelDOWN.prototype._chainedBatch = function () { return new ChainedBatch(this) } diff --git a/test/iterator-test.js b/test/iterator-test.js index 760e7360..413430a1 100644 --- a/test/iterator-test.js +++ b/test/iterator-test.js @@ -88,3 +88,25 @@ make('close db with open iterator', function (db, t, done) { done(null, false) }) }) + +make('key-only iterator', function (db, t, done) { + const it = db.iterator({ values: false, keyAsBuffer: false, valueAsBuffer: false }) + + it.next(function (err, key, value) { + t.ifError(err, 'no next() error') + t.is(key, 'one') + t.is(value, '') // should this be undefined? + it.end(done) + }) +}) + +make('value-only iterator', function (db, t, done) { + const it = db.iterator({ keys: false, keyAsBuffer: false, valueAsBuffer: false }) + + it.next(function (err, key, value) { + t.ifError(err, 'no next() error') + t.is(key, '') // should this be undefined? + t.is(value, '1') + it.end(done) + }) +}) From 29581291dd942cf974d33ba055614f5ff36805b0 Mon Sep 17 00:00:00 2001 From: Vincent Weevers Date: Sun, 19 Sep 2021 01:26:19 +0200 Subject: [PATCH 2/5] Prevent GC of db during `clear()` and other operations --- .github/workflows/test.yml | 2 ++ binding.cc | 19 ++++++++++++--- test/clear-gc-test.js | 47 ++++++++++++++++++++++++++++++++++++++ test/gc.js | 1 + 4 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 test/clear-gc-test.js diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 444f20f2..15227ae9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -39,3 +39,5 @@ jobs: uses: GabrielBB/xvfb-action@v1 with: run: npm run test-electron + - name: Test GC + run: npm run test-gc diff --git a/binding.cc b/binding.cc index cc6e57cb..94a4c1d3 100644 --- a/binding.cc +++ b/binding.cc @@ -361,9 +361,14 @@ struct Database { filterPolicy_(leveldb::NewBloomFilterPolicy(10)), currentIteratorId_(0), pendingCloseWorker_(NULL), + ref_(NULL), priorityWork_(0) {} ~Database () { + if (ref_ != NULL) { + napi_delete_reference(env_, ref_); + } + if (db_ != NULL) { delete db_; db_ = NULL; @@ -444,11 +449,13 @@ struct Database { } void IncrementPriorityWork () { - ++priorityWork_; + napi_reference_ref(env_, ref_, &priorityWork_); } void DecrementPriorityWork () { - if (--priorityWork_ == 0 && pendingCloseWorker_ != NULL) { + napi_reference_unref(env_, ref_, &priorityWork_); + + if (priorityWork_ == 0 && pendingCloseWorker_ != NULL) { pendingCloseWorker_->Queue(); pendingCloseWorker_ = NULL; } @@ -465,6 +472,7 @@ struct Database { uint32_t currentIteratorId_; BaseWorker *pendingCloseWorker_; std::map< uint32_t, Iterator * > iterators_; + napi_ref ref_; private: uint32_t priorityWork_; @@ -828,11 +836,16 @@ NAPI_METHOD(db_init) { NAPI_STATUS_THROWS(napi_create_external(env, database, FinalizeDatabase, NULL, &result)); + + // Reference counter to prevent GC of database while priority workers are active + NAPI_STATUS_THROWS(napi_create_reference(env, result, 0, &database->ref_)); + return result; } /** * Worker class for opening a database. + * TODO: shouldn't this be a PriorityWorker? */ struct OpenWorker final : public BaseWorker { OpenWorker (napi_env env, @@ -1133,7 +1146,6 @@ struct ClearWorker final : public PriorityWorker { } ~ClearWorker () { - // TODO: write GC tests delete baseIterator_; delete writeOptions_; } @@ -1476,6 +1488,7 @@ struct EndWorker final : public BaseWorker { } void HandleOKCallback () override { + // TODO: if we don't use EndWorker, do we still delete the reference? napi_delete_reference(env_, iterator_->Detach()); BaseWorker::HandleOKCallback(); } diff --git a/test/clear-gc-test.js b/test/clear-gc-test.js new file mode 100644 index 00000000..2794878a --- /dev/null +++ b/test/clear-gc-test.js @@ -0,0 +1,47 @@ +'use strict' + +const test = require('tape') +const testCommon = require('./common') +const sourceData = [] + +for (let i = 0; i < 1e3; i++) { + sourceData.push({ + type: 'put', + key: i.toString(), + value: Math.random().toString() + }) +} + +test('db without ref does not get GCed while clear() is in progress', function (t) { + t.plan(4) + + let db = testCommon.factory() + + db.open(function (err) { + t.ifError(err, 'no open error') + + // Insert test data + db.batch(sourceData.slice(), function (err) { + t.ifError(err, 'no batch error') + + // Start async work + db.clear(function () { + t.pass('got callback') + + // Give GC another chance to run, to rule out other issues. + setImmediate(function () { + if (global.gc) global.gc() + t.pass() + }) + }) + + // Remove reference. The db should not get garbage collected + // until after the clear() callback, thanks to a napi_ref. + db = null + + // Useful for manual testing with "node --expose-gc". + // The pending tap assertion may also allow GC to kick in. + if (global.gc) global.gc() + }) + }) +}) diff --git a/test/gc.js b/test/gc.js index a5655f91..956cc2ad 100644 --- a/test/gc.js +++ b/test/gc.js @@ -10,3 +10,4 @@ if (!global.gc) { require('./cleanup-hanging-iterators-test') require('./iterator-gc-test') require('./chained-batch-gc-test') +require('./clear-gc-test') From d65809987a468d253ed3124941fc9f45e18a00d5 Mon Sep 17 00:00:00 2001 From: Vincent Weevers Date: Mon, 20 Sep 2021 10:35:33 +0200 Subject: [PATCH 3/5] Refactor: avoid storing `napi_env` --- binding.cc | 217 +++++++++++++++++++++++++++-------------------------- 1 file changed, 110 insertions(+), 107 deletions(-) diff --git a/binding.cc b/binding.cc index 94a4c1d3..44b197e7 100644 --- a/binding.cc +++ b/binding.cc @@ -263,17 +263,18 @@ static napi_status CallFunction (napi_env env, * - DoFinally (main thread): do cleanup regardless of success */ struct BaseWorker { + // Note: storing env is discouraged as we'd end up using it in unsafe places. BaseWorker (napi_env env, Database* database, napi_value callback, const char* resourceName) - : env_(env), database_(database), errMsg_(NULL) { - NAPI_STATUS_THROWS_VOID(napi_create_reference(env_, callback, 1, &callbackRef_)); + : database_(database), errMsg_(NULL) { + NAPI_STATUS_THROWS_VOID(napi_create_reference(env, callback, 1, &callbackRef_)); napi_value asyncResourceName; - NAPI_STATUS_THROWS_VOID(napi_create_string_utf8(env_, resourceName, + NAPI_STATUS_THROWS_VOID(napi_create_string_utf8(env, resourceName, NAPI_AUTO_LENGTH, &asyncResourceName)); - NAPI_STATUS_THROWS_VOID(napi_create_async_work(env_, callback, + NAPI_STATUS_THROWS_VOID(napi_create_async_work(env, callback, asyncResourceName, BaseWorker::Execute, BaseWorker::Complete, @@ -282,12 +283,13 @@ struct BaseWorker { virtual ~BaseWorker () { delete [] errMsg_; - napi_delete_reference(env_, callbackRef_); - napi_delete_async_work(env_, asyncWork_); } static void Execute (napi_env env, void* data) { BaseWorker* self = (BaseWorker*)data; + + // Don't pass env to DoExecute() because use of Node-API + // methods should generally be avoided in async work. self->DoExecute(); } @@ -308,39 +310,43 @@ struct BaseWorker { } virtual void DoExecute () = 0; - virtual void DoFinally () {}; + virtual void DoFinally (napi_env env) {}; static void Complete (napi_env env, napi_status status, void* data) { BaseWorker* self = (BaseWorker*)data; - self->DoComplete(); - self->DoFinally(); + + self->DoComplete(env); + self->DoFinally(env); + + napi_delete_reference(env, self->callbackRef_); + napi_delete_async_work(env, self->asyncWork_); + delete self; } - void DoComplete () { + void DoComplete (napi_env env) { if (status_.ok()) { - return HandleOKCallback(); + return HandleOKCallback(env); } - napi_value argv = CreateError(env_, errMsg_); + napi_value argv = CreateError(env, errMsg_); napi_value callback; - napi_get_reference_value(env_, callbackRef_, &callback); - CallFunction(env_, callback, 1, &argv); + napi_get_reference_value(env, callbackRef_, &callback); + CallFunction(env, callback, 1, &argv); } - virtual void HandleOKCallback () { + virtual void HandleOKCallback (napi_env env) { napi_value argv; - napi_get_null(env_, &argv); + napi_get_null(env, &argv); napi_value callback; - napi_get_reference_value(env_, callbackRef_, &callback); - CallFunction(env_, callback, 1, &argv); + napi_get_reference_value(env, callbackRef_, &callback); + CallFunction(env, callback, 1, &argv); } - void Queue () { - napi_queue_async_work(env_, asyncWork_); + void Queue (napi_env env) { + napi_queue_async_work(env, asyncWork_); } - napi_env env_; napi_ref callbackRef_; napi_async_work asyncWork_; Database* database_; @@ -354,9 +360,8 @@ struct BaseWorker { * Owns the LevelDB storage, cache, filter policy and iterators. */ struct Database { - Database (napi_env env) - : env_(env), - db_(NULL), + Database () + : db_(NULL), blockCache_(NULL), filterPolicy_(leveldb::NewBloomFilterPolicy(10)), currentIteratorId_(0), @@ -365,10 +370,6 @@ struct Database { priorityWork_(0) {} ~Database () { - if (ref_ != NULL) { - napi_delete_reference(env_, ref_); - } - if (db_ != NULL) { delete db_; db_ = NULL; @@ -438,25 +439,25 @@ struct Database { return db_->ReleaseSnapshot(snapshot); } - void AttachIterator (uint32_t id, Iterator* iterator) { + void AttachIterator (napi_env env, uint32_t id, Iterator* iterator) { iterators_[id] = iterator; - IncrementPriorityWork(); + IncrementPriorityWork(env); } - void DetachIterator (uint32_t id) { + void DetachIterator (napi_env env, uint32_t id) { iterators_.erase(id); - DecrementPriorityWork(); + DecrementPriorityWork(env); } - void IncrementPriorityWork () { - napi_reference_ref(env_, ref_, &priorityWork_); + void IncrementPriorityWork (napi_env env) { + napi_reference_ref(env, ref_, &priorityWork_); } - void DecrementPriorityWork () { - napi_reference_unref(env_, ref_, &priorityWork_); + void DecrementPriorityWork (napi_env env) { + napi_reference_unref(env, ref_, &priorityWork_); if (priorityWork_ == 0 && pendingCloseWorker_ != NULL) { - pendingCloseWorker_->Queue(); + pendingCloseWorker_->Queue(env); pendingCloseWorker_ = NULL; } } @@ -465,7 +466,6 @@ struct Database { return priorityWork_ > 0; } - napi_env env_; leveldb::DB* db_; leveldb::Cache* blockCache_; const leveldb::FilterPolicy* filterPolicy_; @@ -484,13 +484,13 @@ struct Database { struct PriorityWorker : public BaseWorker { PriorityWorker (napi_env env, Database* database, napi_value callback, const char* resourceName) : BaseWorker(env, database, callback, resourceName) { - database_->IncrementPriorityWork(); + database_->IncrementPriorityWork(env); } - ~PriorityWorker () {} + virtual ~PriorityWorker () {} - void DoFinally () override { - database_->DecrementPriorityWork(); + void DoFinally (napi_env env) override { + database_->DecrementPriorityWork(env); } }; @@ -524,7 +524,7 @@ struct BaseIterator { dbIterator_ = database_->NewIterator(options_); } - ~BaseIterator () { + virtual ~BaseIterator () { assert(hasEnded_); if (lt_ != NULL) delete lt_; @@ -535,7 +535,7 @@ struct BaseIterator { delete options_; } - bool DidSeek () { + bool DidSeek () const { return didSeek_; } @@ -649,19 +649,19 @@ struct BaseIterator { else dbIterator_->Next(); } - leveldb::Slice CurrentKey () { + leveldb::Slice CurrentKey () const { return dbIterator_->key(); } - leveldb::Slice CurrentValue () { + leveldb::Slice CurrentValue () const { return dbIterator_->value(); } - leveldb::Status Status () { + leveldb::Status Status () const { return dbIterator_->status(); } - bool OutOfRange (leveldb::Slice& target) { + bool OutOfRange (const leveldb::Slice& target) { return ((lt_ != NULL && target.compare(*lt_) >= 0) || (lte_ != NULL && target.compare(*lte_) > 0) || (gt_ != NULL && target.compare(*gt_) <= 0) || @@ -719,21 +719,21 @@ struct Iterator final : public BaseIterator { ~Iterator () {} - void Attach (napi_ref ref) { - ref_ = ref; - database_->AttachIterator(id_, this); + void Attach (napi_env env, napi_value context) { + napi_create_reference(env, context, 1, &ref_); + database_->AttachIterator(env, id_, this); } - napi_ref Detach () { - database_->DetachIterator(id_); - return ref_; + void Detach (napi_env env) { + database_->DetachIterator(env, id_); + if (ref_ != NULL) napi_delete_reference(env, ref_); } - void CheckEndCallback () { + void CheckEndCallback (napi_env env) { nexting_ = false; if (endWorker_ != NULL) { - endWorker_->Queue(); + endWorker_->Queue(env); endWorker_ = NULL; } } @@ -780,7 +780,6 @@ struct Iterator final : public BaseIterator { uint32_t highWaterMark_; bool landed_; bool nexting_; - BaseWorker* endWorker_; private: @@ -805,6 +804,7 @@ static void env_cleanup_hook (void* arg) { std::map iterators = database->iterators_; std::map::iterator it; + // TODO: does not do `napi_delete_reference(env, iterator->ref_)`. Problem? for (it = iterators.begin(); it != iterators.end(); ++it) { it->second->End(); } @@ -821,6 +821,7 @@ static void FinalizeDatabase (napi_env env, void* data, void* hint) { if (data) { Database* database = (Database*)data; napi_remove_env_cleanup_hook(env, env_cleanup_hook, database); + if (database->ref_ != NULL) napi_delete_reference(env, database->ref_); delete database; } } @@ -829,7 +830,7 @@ static void FinalizeDatabase (napi_env env, void* data, void* hint) { * Returns a context object for a database. */ NAPI_METHOD(db_init) { - Database* database = new Database(env); + Database* database = new Database(); napi_add_env_cleanup_hook(env, env_cleanup_hook, database); napi_value result; @@ -915,7 +916,7 @@ NAPI_METHOD(db_open) { compression, writeBufferSize, blockSize, maxOpenFiles, blockRestartInterval, maxFileSize); - worker->Queue(); + worker->Queue(env); delete [] location; NAPI_RETURN_UNDEFINED(); @@ -952,7 +953,7 @@ NAPI_METHOD(db_close) { CloseWorker* worker = new CloseWorker(env, database, callback); if (!database->HasPriorityWork()) { - worker->Queue(); + worker->Queue(env); NAPI_RETURN_UNDEFINED(); } @@ -1013,7 +1014,7 @@ NAPI_METHOD(db_put) { napi_value callback = argv[4]; PutWorker* worker = new PutWorker(env, database, callback, key, value, sync); - worker->Queue(); + worker->Queue(env); NAPI_RETURN_UNDEFINED(); } @@ -1042,19 +1043,19 @@ struct GetWorker final : public PriorityWorker { SetStatus(database_->Get(options_, key_, value_)); } - void HandleOKCallback () override { + void HandleOKCallback (napi_env env) override { napi_value argv[2]; - napi_get_null(env_, &argv[0]); + napi_get_null(env, &argv[0]); if (asBuffer_) { - napi_create_buffer_copy(env_, value_.size(), value_.data(), NULL, &argv[1]); + napi_create_buffer_copy(env, value_.size(), value_.data(), NULL, &argv[1]); } else { - napi_create_string_utf8(env_, value_.data(), value_.size(), &argv[1]); + napi_create_string_utf8(env, value_.data(), value_.size(), &argv[1]); } napi_value callback; - napi_get_reference_value(env_, callbackRef_, &callback); - CallFunction(env_, callback, 2, argv); + napi_get_reference_value(env, callbackRef_, &callback); + CallFunction(env, callback, 2, argv); } leveldb::ReadOptions options_; @@ -1078,7 +1079,7 @@ NAPI_METHOD(db_get) { GetWorker* worker = new GetWorker(env, database, callback, key, asBuffer, fillCache); - worker->Queue(); + worker->Queue(env); NAPI_RETURN_UNDEFINED(); } @@ -1121,7 +1122,7 @@ NAPI_METHOD(db_del) { napi_value callback = argv[3]; DelWorker* worker = new DelWorker(env, database, callback, key, sync); - worker->Queue(); + worker->Queue(env); NAPI_RETURN_UNDEFINED(); } @@ -1205,7 +1206,7 @@ NAPI_METHOD(db_clear) { std::string* gte = RangeOption(env, options, "gte"); ClearWorker* worker = new ClearWorker(env, database, callback, reverse, limit, lt, lte, gt, gte); - worker->Queue(); + worker->Queue(env); NAPI_RETURN_UNDEFINED(); } @@ -1232,13 +1233,13 @@ struct ApproximateSizeWorker final : public PriorityWorker { size_ = database_->ApproximateSize(&range); } - void HandleOKCallback () override { + void HandleOKCallback (napi_env env) override { napi_value argv[2]; - napi_get_null(env_, &argv[0]); - napi_create_int64(env_, (uint64_t)size_, &argv[1]); + napi_get_null(env, &argv[0]); + napi_create_int64(env, (uint64_t)size_, &argv[1]); napi_value callback; - napi_get_reference_value(env_, callbackRef_, &callback); - CallFunction(env_, callback, 2, argv); + napi_get_reference_value(env, callbackRef_, &callback); + CallFunction(env, callback, 2, argv); } leveldb::Slice start_; @@ -1261,7 +1262,7 @@ NAPI_METHOD(db_approximate_size) { ApproximateSizeWorker* worker = new ApproximateSizeWorker(env, database, callback, start, end); - worker->Queue(); + worker->Queue(env); NAPI_RETURN_UNDEFINED(); } @@ -1304,7 +1305,7 @@ NAPI_METHOD(db_compact_range) { CompactRangeWorker* worker = new CompactRangeWorker(env, database, callback, start, end); - worker->Queue(); + worker->Queue(env); NAPI_RETURN_UNDEFINED(); } @@ -1358,7 +1359,7 @@ NAPI_METHOD(destroy_db) { napi_value callback = argv[1]; DestroyWorker* worker = new DestroyWorker(env, location, callback); - worker->Queue(); + worker->Queue(env); delete [] location; @@ -1394,7 +1395,7 @@ NAPI_METHOD(repair_db) { napi_value callback = argv[1]; RepairWorker* worker = new RepairWorker(env, location, callback); - worker->Queue(); + worker->Queue(env); delete [] location; @@ -1438,7 +1439,6 @@ NAPI_METHOD(iterator_init) { values, limit, lt, lte, gt, gte, fillCache, keyAsBuffer, valueAsBuffer, highWaterMark); napi_value result; - napi_ref ref; NAPI_STATUS_THROWS(napi_create_external(env, iterator, FinalizeIterator, @@ -1446,8 +1446,7 @@ NAPI_METHOD(iterator_init) { // Prevent GC of JS object before the iterator is ended (explicitly or on // db close) and keep track of non-ended iterators to end them on db close. - NAPI_STATUS_THROWS(napi_create_reference(env, result, 1, &ref)); - iterator->Attach(ref); + iterator->Attach(env, result); return result; } @@ -1487,10 +1486,10 @@ struct EndWorker final : public BaseWorker { iterator_->End(); } - void HandleOKCallback () override { - // TODO: if we don't use EndWorker, do we still delete the reference? - napi_delete_reference(env_, iterator_->Detach()); - BaseWorker::HandleOKCallback(); + void HandleOKCallback (napi_env env) override { + // TODO: would this be safe(r) to do in DoFinally() i.e. after we call the callback? + iterator_->Detach(env); + BaseWorker::HandleOKCallback(env); } Iterator* iterator_; @@ -1508,7 +1507,7 @@ static void iterator_end_do (napi_env env, Iterator* iterator, napi_value cb) { if (iterator->nexting_) { iterator->endWorker_ = worker; } else { - worker->Queue(); + worker->Queue(env); } } } @@ -1552,10 +1551,10 @@ struct NextWorker final : public BaseWorker { } } - void HandleOKCallback () override { + void HandleOKCallback (napi_env env) override { size_t arraySize = result_.size() * 2; napi_value jsArray; - napi_create_array_with_length(env_, arraySize, &jsArray); + napi_create_array_with_length(env, arraySize, &jsArray); for (size_t idx = 0; idx < result_.size(); ++idx) { std::pair row = result_[idx]; @@ -1564,33 +1563,34 @@ struct NextWorker final : public BaseWorker { napi_value returnKey; if (iterator_->keyAsBuffer_) { - napi_create_buffer_copy(env_, key.size(), key.data(), NULL, &returnKey); + napi_create_buffer_copy(env, key.size(), key.data(), NULL, &returnKey); } else { - napi_create_string_utf8(env_, key.data(), key.size(), &returnKey); + napi_create_string_utf8(env, key.data(), key.size(), &returnKey); } napi_value returnValue; if (iterator_->valueAsBuffer_) { - napi_create_buffer_copy(env_, value.size(), value.data(), NULL, &returnValue); + napi_create_buffer_copy(env, value.size(), value.data(), NULL, &returnValue); } else { - napi_create_string_utf8(env_, value.data(), value.size(), &returnValue); + napi_create_string_utf8(env, value.data(), value.size(), &returnValue); } // put the key & value in a descending order, so that they can be .pop:ed in javascript-land - napi_set_element(env_, jsArray, static_cast(arraySize - idx * 2 - 1), returnKey); - napi_set_element(env_, jsArray, static_cast(arraySize - idx * 2 - 2), returnValue); + napi_set_element(env, jsArray, static_cast(arraySize - idx * 2 - 1), returnKey); + napi_set_element(env, jsArray, static_cast(arraySize - idx * 2 - 2), returnValue); } // clean up & handle the next/end state - iterator_->CheckEndCallback(); + // TODO: always do this, even on error + iterator_->CheckEndCallback(env); napi_value argv[3]; - napi_get_null(env_, &argv[0]); + napi_get_null(env, &argv[0]); argv[1] = jsArray; - napi_get_boolean(env_, !ok_, &argv[2]); + napi_get_boolean(env, !ok_, &argv[2]); napi_value callback; - napi_get_reference_value(env_, callbackRef_, &callback); - CallFunction(env_, callback, 3, argv); + napi_get_reference_value(env, callbackRef_, &callback); + CallFunction(env, callback, 3, argv); } Iterator* iterator_; @@ -1616,7 +1616,7 @@ NAPI_METHOD(iterator_next) { NextWorker* worker = new NextWorker(env, iterator, callback); iterator->nexting_ = true; - worker->Queue(); + worker->Queue(env); NAPI_RETURN_UNDEFINED(); } @@ -1700,7 +1700,7 @@ NAPI_METHOD(batch_do) { } BatchWorker* worker = new BatchWorker(env, database, callback, batch, sync, hasData); - worker->Queue(); + worker->Queue(env); NAPI_RETURN_UNDEFINED(); } @@ -1824,12 +1824,10 @@ struct BatchWriteWorker final : public PriorityWorker { batch_(batch), sync_(sync) { // Prevent GC of batch object before we execute - NAPI_STATUS_THROWS_VOID(napi_create_reference(env_, context, 1, &contextRef_)); + NAPI_STATUS_THROWS_VOID(napi_create_reference(env, context, 1, &contextRef_)); } - ~BatchWriteWorker () { - napi_delete_reference(env_, contextRef_); - } + ~BatchWriteWorker () {} void DoExecute () override { if (batch_->hasData_) { @@ -1837,6 +1835,11 @@ struct BatchWriteWorker final : public PriorityWorker { } } + void DoFinally (napi_env env) override { + napi_delete_reference(env, contextRef_); + PriorityWorker::DoFinally(env); + } + Batch* batch_; bool sync_; @@ -1856,7 +1859,7 @@ NAPI_METHOD(batch_write) { napi_value callback = argv[2]; BatchWriteWorker* worker = new BatchWriteWorker(env, argv[0], batch, callback, sync); - worker->Queue(); + worker->Queue(env); NAPI_RETURN_UNDEFINED(); } From 30cfa75400d4044ada5828abb63efa86c085bf25 Mon Sep 17 00:00:00 2001 From: Vincent Weevers Date: Mon, 20 Sep 2021 14:50:19 +0200 Subject: [PATCH 4/5] Cleanup hanging iterator also when `next()` errored --- binding.cc | 86 +++++++++++++------------- test/cleanup-hanging-iterators-test.js | 24 +++++++ 2 files changed, 66 insertions(+), 44 deletions(-) diff --git a/binding.cc b/binding.cc index 44b197e7..0e530d35 100644 --- a/binding.cc +++ b/binding.cc @@ -260,6 +260,7 @@ static napi_status CallFunction (napi_env env, * * - DoExecute (abstract, worker pool thread): main work * - HandleOKCallback (main thread): call JS callback on success + * - HandleErrorCallback (main thread): call JS callback on error * - DoFinally (main thread): do cleanup regardless of success */ struct BaseWorker { @@ -310,48 +311,52 @@ struct BaseWorker { } virtual void DoExecute () = 0; - virtual void DoFinally (napi_env env) {}; static void Complete (napi_env env, napi_status status, void* data) { BaseWorker* self = (BaseWorker*)data; self->DoComplete(env); self->DoFinally(env); - - napi_delete_reference(env, self->callbackRef_); - napi_delete_async_work(env, self->asyncWork_); - - delete self; } void DoComplete (napi_env env) { - if (status_.ok()) { - return HandleOKCallback(env); - } - - napi_value argv = CreateError(env, errMsg_); napi_value callback; napi_get_reference_value(env, callbackRef_, &callback); - CallFunction(env, callback, 1, &argv); + + if (status_.ok()) { + HandleOKCallback(env, callback); + } else { + HandleErrorCallback(env, callback); + } } - virtual void HandleOKCallback (napi_env env) { + virtual void HandleOKCallback (napi_env env, napi_value callback) { napi_value argv; napi_get_null(env, &argv); - napi_value callback; - napi_get_reference_value(env, callbackRef_, &callback); CallFunction(env, callback, 1, &argv); } + virtual void HandleErrorCallback (napi_env env, napi_value callback) { + napi_value argv = CreateError(env, errMsg_); + CallFunction(env, callback, 1, &argv); + } + + virtual void DoFinally (napi_env env) { + napi_delete_reference(env, callbackRef_); + napi_delete_async_work(env, asyncWork_); + + delete this; + } + void Queue (napi_env env) { napi_queue_async_work(env, asyncWork_); } - napi_ref callbackRef_; - napi_async_work asyncWork_; Database* database_; private: + napi_ref callbackRef_; + napi_async_work asyncWork_; leveldb::Status status_; char *errMsg_; }; @@ -491,6 +496,7 @@ struct PriorityWorker : public BaseWorker { void DoFinally (napi_env env) override { database_->DecrementPriorityWork(env); + BaseWorker::DoFinally(env); } }; @@ -507,7 +513,6 @@ struct BaseIterator { int limit, bool fillCache) : database_(database), - isEnding_(false), hasEnded_(false), didSeek_(false), reverse_(reverse), @@ -669,7 +674,6 @@ struct BaseIterator { } Database* database_; - bool isEnding_; bool hasEnded_; private: @@ -713,6 +717,7 @@ struct Iterator final : public BaseIterator { highWaterMark_(highWaterMark), landed_(false), nexting_(false), + isEnding_(false), endWorker_(NULL), ref_(NULL) { } @@ -729,15 +734,6 @@ struct Iterator final : public BaseIterator { if (ref_ != NULL) napi_delete_reference(env, ref_); } - void CheckEndCallback (napi_env env) { - nexting_ = false; - - if (endWorker_ != NULL) { - endWorker_->Queue(env); - endWorker_ = NULL; - } - } - bool ReadMany (uint32_t size, std::vector>& result) { size_t bytesRead = 0; @@ -780,6 +776,7 @@ struct Iterator final : public BaseIterator { uint32_t highWaterMark_; bool landed_; bool nexting_; + bool isEnding_; BaseWorker* endWorker_; private: @@ -1043,7 +1040,7 @@ struct GetWorker final : public PriorityWorker { SetStatus(database_->Get(options_, key_, value_)); } - void HandleOKCallback (napi_env env) override { + void HandleOKCallback (napi_env env, napi_value callback) override { napi_value argv[2]; napi_get_null(env, &argv[0]); @@ -1053,8 +1050,6 @@ struct GetWorker final : public PriorityWorker { napi_create_string_utf8(env, value_.data(), value_.size(), &argv[1]); } - napi_value callback; - napi_get_reference_value(env, callbackRef_, &callback); CallFunction(env, callback, 2, argv); } @@ -1233,12 +1228,10 @@ struct ApproximateSizeWorker final : public PriorityWorker { size_ = database_->ApproximateSize(&range); } - void HandleOKCallback (napi_env env) override { + void HandleOKCallback (napi_env env, napi_value callback) override { napi_value argv[2]; napi_get_null(env, &argv[0]); napi_create_int64(env, (uint64_t)size_, &argv[1]); - napi_value callback; - napi_get_reference_value(env, callbackRef_, &callback); CallFunction(env, callback, 2, argv); } @@ -1486,10 +1479,9 @@ struct EndWorker final : public BaseWorker { iterator_->End(); } - void HandleOKCallback (napi_env env) override { - // TODO: would this be safe(r) to do in DoFinally() i.e. after we call the callback? + void DoFinally (napi_env env) override { iterator_->Detach(env); - BaseWorker::HandleOKCallback(env); + BaseWorker::DoFinally(env); } Iterator* iterator_; @@ -1551,7 +1543,7 @@ struct NextWorker final : public BaseWorker { } } - void HandleOKCallback (napi_env env) override { + void HandleOKCallback (napi_env env, napi_value callback) override { size_t arraySize = result_.size() * 2; napi_value jsArray; napi_create_array_with_length(env, arraySize, &jsArray); @@ -1580,19 +1572,25 @@ struct NextWorker final : public BaseWorker { napi_set_element(env, jsArray, static_cast(arraySize - idx * 2 - 2), returnValue); } - // clean up & handle the next/end state - // TODO: always do this, even on error - iterator_->CheckEndCallback(env); - napi_value argv[3]; napi_get_null(env, &argv[0]); argv[1] = jsArray; napi_get_boolean(env, !ok_, &argv[2]); - napi_value callback; - napi_get_reference_value(env, callbackRef_, &callback); CallFunction(env, callback, 3, argv); } + void DoFinally (napi_env env) override { + // clean up & handle the next/end state + iterator_->nexting_ = false; + + if (iterator_->endWorker_ != NULL) { + iterator_->endWorker_->Queue(env); + iterator_->endWorker_ = NULL; + } + + BaseWorker::DoFinally(env); + } + Iterator* iterator_; std::vector > result_; bool ok_; diff --git a/test/cleanup-hanging-iterators-test.js b/test/cleanup-hanging-iterators-test.js index 89578a28..535977ee 100644 --- a/test/cleanup-hanging-iterators-test.js +++ b/test/cleanup-hanging-iterators-test.js @@ -92,3 +92,27 @@ makeTest('test ending iterators', function (db, t, done) { done() }) }) + +makeTest('test recursive next', function (db, t, done) { + // Test that we're able to close when user keeps scheduling work + const it = db.iterator({ highWaterMark: 0 }) + + it.next(function loop (err, key) { + if (err && err.message !== 'iterator has ended') throw err + if (key !== undefined) it.next(loop) + }) + + done() +}) + +makeTest('test recursive next (random)', function (db, t, done) { + // Same as the test above but closing at a random time + const it = db.iterator({ highWaterMark: 0 }) + + it.next(function loop (err, key) { + if (err && err.message !== 'iterator has ended') throw err + if (key !== undefined) it.next(loop) + }) + + setTimeout(done, Math.floor(Math.random() * 50)) +}) From 9e146b5fe7e35c491aafea13c3d4344686dfdf53 Mon Sep 17 00:00:00 2001 From: Vincent Weevers Date: Mon, 20 Sep 2021 20:48:25 +0200 Subject: [PATCH 5/5] Optimize `db.iterator()` By using `emplace_back()`, reusing the `std::vector` cache between `iterator.next()` calls, and not advancing the iterator prematurely. That last one only matters for single reads (i.e. the first `next()` call or one made after seeking) and it doesn't improve performance compared to the previous release, just undoes a mistake in b815bea. --- binding.cc | 136 ++++++++++++++++++++++++++--------------------------- 1 file changed, 67 insertions(+), 69 deletions(-) diff --git a/binding.cc b/binding.cc index 0e530d35..2168a97b 100644 --- a/binding.cc +++ b/binding.cc @@ -467,7 +467,7 @@ struct Database { } } - bool HasPriorityWork () { + bool HasPriorityWork () const { return priorityWork_ > 0; } @@ -521,8 +521,7 @@ struct BaseIterator { gt_(gt), gte_(gte), limit_(limit), - count_(0), - eof_(false) { + count_(0) { options_ = new leveldb::ReadOptions(); options_->fill_cache = fillCache; options_->snapshot = database->NewSnapshot(); @@ -588,40 +587,22 @@ struct BaseIterator { didSeek_ = true; if (OutOfRange(target)) { - if (reverse_) { - dbIterator_->SeekToFirst(); - dbIterator_->Prev(); - } else { - dbIterator_->SeekToLast(); - dbIterator_->Next(); - } - - return; + return SeekToEnd(); } dbIterator_->Seek(target); if (dbIterator_->Valid()) { int cmp = dbIterator_->key().compare(target); - if (cmp > 0 && reverse_) { - dbIterator_->Prev(); - } else if (cmp < 0 && !reverse_) { - dbIterator_->Next(); + if (reverse_ ? cmp > 0 : cmp < 0) { + Next(); } } else { - if (reverse_) { - dbIterator_->SeekToLast(); - } else { - dbIterator_->SeekToFirst(); - } + SeekToFirst(); if (dbIterator_->Valid()) { int cmp = dbIterator_->key().compare(target); - if (cmp > 0 && reverse_) { - dbIterator_->SeekToFirst(); - dbIterator_->Prev(); - } else if (cmp < 0 && !reverse_) { - dbIterator_->SeekToLast(); - dbIterator_->Next(); + if (reverse_ ? cmp > 0 : cmp < 0) { + SeekToEnd(); } } } @@ -636,24 +617,34 @@ struct BaseIterator { } } - bool ReadOne () { - if (eof_ || !dbIterator_->Valid()) { - return false; - } - - if ((limit_ >= 0 && ++count_ > limit_) || OutOfRange(dbIterator_->key())) { - eof_ = true; - return false; - } + bool Valid () const { + return dbIterator_->Valid() && !OutOfRange(dbIterator_->key()); + } - return true; + bool Increment () { + return limit_ < 0 || ++count_ <= limit_; } - void Advance () { + void Next () { if (reverse_) dbIterator_->Prev(); else dbIterator_->Next(); } + void SeekToFirst () { + if (reverse_) dbIterator_->SeekToLast(); + else dbIterator_->SeekToFirst(); + } + + void SeekToLast () { + if (reverse_) dbIterator_->SeekToFirst(); + else dbIterator_->SeekToLast(); + } + + void SeekToEnd () { + SeekToLast(); + Next(); + } + leveldb::Slice CurrentKey () const { return dbIterator_->key(); } @@ -666,7 +657,13 @@ struct BaseIterator { return dbIterator_->status(); } - bool OutOfRange (const leveldb::Slice& target) { + bool OutOfRange (const leveldb::Slice& target) const { + // TODO: benchmark to see if this is worth it + // if (upperBoundOnly && !reverse_) { + // return ((lt_ != NULL && target.compare(*lt_) >= 0) || + // (lte_ != NULL && target.compare(*lte_) > 0)); + // } + return ((lt_ != NULL && target.compare(*lt_) >= 0) || (lte_ != NULL && target.compare(*lte_) > 0) || (gt_ != NULL && target.compare(*gt_) <= 0) || @@ -686,7 +683,6 @@ struct BaseIterator { std::string* gte_; int limit_; int count_; - bool eof_; leveldb::ReadOptions* options_; }; @@ -734,33 +730,36 @@ struct Iterator final : public BaseIterator { if (ref_ != NULL) napi_delete_reference(env, ref_); } - bool ReadMany (uint32_t size, std::vector>& result) { + bool ReadMany (uint32_t size) { + cache_.clear(); size_t bytesRead = 0; - while (ReadOne()) { - std::string key, value; + while (true) { + if (landed_) Next(); + if (!Valid() || !Increment()) break; if (keys_) { leveldb::Slice slice = CurrentKey(); - key.assign(slice.data(), slice.size()); - bytesRead += key.size(); + cache_.emplace_back(slice.data(), slice.size()); + bytesRead += slice.size(); + } else { + cache_.emplace_back(""); } if (values_) { leveldb::Slice slice = CurrentValue(); - value.assign(slice.data(), slice.size()); - bytesRead += value.size(); + cache_.emplace_back(slice.data(), slice.size()); + bytesRead += slice.size(); + } else { + cache_.emplace_back(""); } - Advance(); - result.push_back(std::make_pair(key, value)); - if (!landed_) { landed_ = true; return true; } - if (bytesRead > highWaterMark_ || result.size() >= size) { + if (bytesRead > highWaterMark_ || cache_.size() >= size * 2) { return true; } } @@ -778,6 +777,7 @@ struct Iterator final : public BaseIterator { bool nexting_; bool isEnding_; BaseWorker* endWorker_; + std::vector cache_; private: napi_ref ref_; @@ -1136,18 +1136,18 @@ struct ClearWorker final : public PriorityWorker { std::string* gt, std::string* gte) : PriorityWorker(env, database, callback, "leveldown.db.clear") { - baseIterator_ = new BaseIterator(database, reverse, lt, lte, gt, gte, limit, false); + iterator_ = new BaseIterator(database, reverse, lt, lte, gt, gte, limit, false); writeOptions_ = new leveldb::WriteOptions(); writeOptions_->sync = false; } ~ClearWorker () { - delete baseIterator_; + delete iterator_; delete writeOptions_; } void DoExecute () override { - baseIterator_->SeekToRange(); + iterator_->SeekToRange(); // TODO: add option uint32_t hwm = 16 * 1024; @@ -1156,14 +1156,14 @@ struct ClearWorker final : public PriorityWorker { while (true) { size_t bytesRead = 0; - while (bytesRead < hwm && baseIterator_->ReadOne()) { - leveldb::Slice key = baseIterator_->CurrentKey(); + while (bytesRead <= hwm && iterator_->Valid() && iterator_->Increment()) { + leveldb::Slice key = iterator_->CurrentKey(); batch.Delete(key); bytesRead += key.size(); - baseIterator_->Advance(); + iterator_->Next(); } - if (!SetStatus(baseIterator_->Status()) || bytesRead == 0) { + if (!SetStatus(iterator_->Status()) || bytesRead == 0) { break; } @@ -1174,11 +1174,11 @@ struct ClearWorker final : public PriorityWorker { batch.Clear(); } - baseIterator_->End(); + iterator_->End(); } private: - BaseIterator* baseIterator_; + BaseIterator* iterator_; leveldb::WriteOptions* writeOptions_; }; @@ -1536,7 +1536,7 @@ struct NextWorker final : public BaseWorker { // Limit the size of the cache to prevent starving the event loop // in JS-land while we're recursively calling process.nextTick(). - ok_ = iterator_->ReadMany(1000, result_); + ok_ = iterator_->ReadMany(1000); if (!ok_) { SetStatus(iterator_->Status()); @@ -1544,14 +1544,13 @@ struct NextWorker final : public BaseWorker { } void HandleOKCallback (napi_env env, napi_value callback) override { - size_t arraySize = result_.size() * 2; + size_t arraySize = iterator_->cache_.size(); napi_value jsArray; napi_create_array_with_length(env, arraySize, &jsArray); - for (size_t idx = 0; idx < result_.size(); ++idx) { - std::pair row = result_[idx]; - std::string key = row.first; - std::string value = row.second; + for (size_t idx = 0; idx < iterator_->cache_.size(); idx += 2) { + std::string key = iterator_->cache_[idx]; + std::string value = iterator_->cache_[idx + 1]; napi_value returnKey; if (iterator_->keyAsBuffer_) { @@ -1568,8 +1567,8 @@ struct NextWorker final : public BaseWorker { } // put the key & value in a descending order, so that they can be .pop:ed in javascript-land - napi_set_element(env, jsArray, static_cast(arraySize - idx * 2 - 1), returnKey); - napi_set_element(env, jsArray, static_cast(arraySize - idx * 2 - 2), returnValue); + napi_set_element(env, jsArray, static_cast(arraySize - idx - 1), returnKey); + napi_set_element(env, jsArray, static_cast(arraySize - idx - 2), returnValue); } napi_value argv[3]; @@ -1592,7 +1591,6 @@ struct NextWorker final : public BaseWorker { } Iterator* iterator_; - std::vector > result_; bool ok_; };