From 86143fc626827495ba2d0eb2c7af3d7c4c51535a Mon Sep 17 00:00:00 2001 From: Rok Date: Tue, 5 Jul 2022 20:07:11 +0200 Subject: [PATCH 01/13] Initial commit --- cpp/src/arrow/memory_pool.h | 6 ++++++ cpp/src/arrow/memory_pool_jemalloc.cc | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h index 58b375af3a9..3e8b4ae5ff8 100644 --- a/cpp/src/arrow/memory_pool.h +++ b/cpp/src/arrow/memory_pool.h @@ -180,6 +180,12 @@ Status jemalloc_set_decay_ms(int ms); /// May return NotImplemented if mimalloc is not available. ARROW_EXPORT Status mimalloc_memory_pool(MemoryPool** out); +ARROW_EXPORT Status jemalloc_mallctl(const char* name, void* oldp, size_t* oldlenp, + void* newp, size_t newlen); + +ARROW_EXPORT Status jemalloc_stats_print(void (*write_cb)(void*, const char*), + void* cbopaque, const char* opts); + /// \brief Return the names of the backends supported by this Arrow build. ARROW_EXPORT std::vector SupportedMemoryBackendNames(); diff --git a/cpp/src/arrow/memory_pool_jemalloc.cc b/cpp/src/arrow/memory_pool_jemalloc.cc index 48a5bac137b..eed1b770b68 100644 --- a/cpp/src/arrow/memory_pool_jemalloc.cc +++ b/cpp/src/arrow/memory_pool_jemalloc.cc @@ -151,6 +151,26 @@ Status jemalloc_set_decay_ms(int ms) { return Status::OK(); } +Status jemalloc_mallctl(const char* name, void* oldp, size_t* oldlenp, void* newp, + size_t newlen) { +#ifdef ARROW_JEMALLOC + int res = mallctl(name, oldp, oldlenp, newp, newlen); + return res ? Status::UnknownError(res) : Status::OK(); +#else + return Status::NotImplemented("This Arrow build does not enable jemalloc"); +#endif +} + +Status jemalloc_stats_print(void (*write_cb)(void*, const char*), void* cbopaque, + const char* opts) { +#ifdef ARROW_JEMALLOC + malloc_stats_print(write_cb, cbopaque, opts); + return Status::OK(); +#else + return Status::NotImplemented("This Arrow build does not enable jemalloc"); +#endif +} + #undef RETURN_IF_JEMALLOC_ERROR } // namespace arrow From 02bc5727debe6628b872f41d66c542b3ab6e0484 Mon Sep 17 00:00:00 2001 From: Rok Date: Mon, 5 Sep 2022 19:42:48 +0200 Subject: [PATCH 02/13] Review feedback --- cpp/src/arrow/memory_pool.cc | 14 +++++++++ cpp/src/arrow/memory_pool.h | 44 ++++++++++++++++++++++++--- cpp/src/arrow/memory_pool_jemalloc.cc | 27 ++++++++-------- cpp/src/arrow/memory_pool_test.cc | 29 ++++++++++++++++++ 4 files changed, 98 insertions(+), 16 deletions(-) diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index 99cb0682462..ffefc796aee 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -669,6 +669,20 @@ MemoryPool* default_memory_pool() { Status jemalloc_set_decay_ms(int ms) { return Status::Invalid("jemalloc support is not built"); } + +Status jemalloc_get_stat(const char* name, size_t* out) { + return Status::Invalid("jemalloc support is not built"); +}; + +Status jemalloc_mallctl(const char* name, void* oldp, size_t* oldlenp, void* newp, + size_t newlen) { + return Status::NotImplemented("This Arrow build does not enable jemalloc"); +} + +Status jemalloc_stats_print(void (*write_cb)(void*, const char*), void* cbopaque, + const char* opts) { + return Status::NotImplemented("This Arrow build does not enable jemalloc"); +} #endif /////////////////////////////////////////////////////////////////////// diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h index 3e8b4ae5ff8..4f4c45f04f1 100644 --- a/cpp/src/arrow/memory_pool.h +++ b/cpp/src/arrow/memory_pool.h @@ -175,17 +175,53 @@ ARROW_EXPORT Status jemalloc_memory_pool(MemoryPool** out); ARROW_EXPORT Status jemalloc_set_decay_ms(int ms); -/// \brief Return a process-wide memory pool based on mimalloc. +/// \brief Get basic allocation statistics from jemalloc +Status jemalloc_get_stat(const char* name, size_t* out); + +/// \brief The mallctl() function provides a general interface for introspecting the +/// memory allocator, as well as setting modifiable parameters and triggering actions. +/// The period-separated name argument specifies a location in a tree-structured +/// namespace; see the MALLCTL NAMESPACE section in jemalloc project documentation for +/// more information on the tree contents. To read a value, pass a pointer via oldp to +/// adequate space to contain the value, and a pointer to its length via oldlenp; +/// otherwise pass NULL and NULL. Similarly, to write a value, pass a pointer to the +/// value via newp, and its length via newlen; otherwise pass NULL and 0. /// -/// May return NotImplemented if mimalloc is not available. -ARROW_EXPORT Status mimalloc_memory_pool(MemoryPool** out); - ARROW_EXPORT Status jemalloc_mallctl(const char* name, void* oldp, size_t* oldlenp, void* newp, size_t newlen); +/// \brief The malloc_stats_print() function writes summary statistics via the write_cb +/// callback function pointer and cbopaque data passed to write_cb, or malloc_message() +/// if write_cb is NULL. The statistics are presented in human-readable form unless “J” +/// is specified as a character within the opts string, in which case the statistics are +/// presented in JSON format. This function can be called repeatedly. General information +/// that never changes during execution can be omitted by specifying “g” as a character +/// within the opts string. Note that malloc_stats_print() uses the mallctl*() functions +/// internally, so inconsistent statistics can be reported if multiple threads use these +/// functions simultaneously. If --enable-stats is specified during configuration, “m”, +/// “d”, and “a” can be specified to omit merged arena, destroyed merged arena, and per +/// arena statistics, respectively; “b” and “l” can be specified to omit per size class +/// statistics for bins and large objects, respectively; “x” can be specified to omit all +/// mutex statistics; “e” can be used to omit extent statistics. Unrecognized characters +/// are silently ignored. Note that thread caching may prevent some statistics from being +/// completely up to date, since extra locking would be required to merge counters that +/// track thread cache operations. +/// +/// The malloc_usable_size() function returns the usable size of the allocation pointed to +/// by ptr. The return value may be larger than the size that was requested during +/// allocation. The malloc_usable_size() function is not a mechanism for in-place +/// realloc(); rather it is provided solely as a tool for introspection purposes. +/// Any discrepancy between the requested allocation size and the size reported by +/// malloc_usable_size() should not be depended on, since such behavior is entirely +/// implementation-dependent. ARROW_EXPORT Status jemalloc_stats_print(void (*write_cb)(void*, const char*), void* cbopaque, const char* opts); +/// \brief Return a process-wide memory pool based on mimalloc. +/// +/// May return NotImplemented if mimalloc is not available. +ARROW_EXPORT Status mimalloc_memory_pool(MemoryPool** out); + /// \brief Return the names of the backends supported by this Arrow build. ARROW_EXPORT std::vector SupportedMemoryBackendNames(); diff --git a/cpp/src/arrow/memory_pool_jemalloc.cc b/cpp/src/arrow/memory_pool_jemalloc.cc index eed1b770b68..ebad73043b4 100644 --- a/cpp/src/arrow/memory_pool_jemalloc.cc +++ b/cpp/src/arrow/memory_pool_jemalloc.cc @@ -16,6 +16,7 @@ // under the License. #include "arrow/memory_pool_internal.h" +#include "arrow/util/io_util.h" #include "arrow/util/logging.h" // IWYU pragma: keep // We can't put the jemalloc memory pool implementation into @@ -151,26 +152,28 @@ Status jemalloc_set_decay_ms(int ms) { return Status::OK(); } +#undef RETURN_IF_JEMALLOC_ERROR + +Status jemalloc_get_stat(const char* name, size_t* out) { + size_t sz = sizeof(size_t); + int err = mallctl(name, out, &sz, NULL, 0); + return err ? arrow::internal::IOErrorFromErrno(err, "Failed retrieving ", &name) + : Status::OK(); +}; + +#ifdef ARROW_JEMALLOC Status jemalloc_mallctl(const char* name, void* oldp, size_t* oldlenp, void* newp, size_t newlen) { -#ifdef ARROW_JEMALLOC - int res = mallctl(name, oldp, oldlenp, newp, newlen); - return res ? Status::UnknownError(res) : Status::OK(); -#else - return Status::NotImplemented("This Arrow build does not enable jemalloc"); -#endif + int err = mallctl(name, oldp, oldlenp, newp, newlen); + return err ? arrow::internal::IOErrorFromErrno(err, "Memory allocation error.") + : Status::OK(); } Status jemalloc_stats_print(void (*write_cb)(void*, const char*), void* cbopaque, const char* opts) { -#ifdef ARROW_JEMALLOC malloc_stats_print(write_cb, cbopaque, opts); return Status::OK(); -#else - return Status::NotImplemented("This Arrow build does not enable jemalloc"); -#endif } - -#undef RETURN_IF_JEMALLOC_ERROR +#endif } // namespace arrow diff --git a/cpp/src/arrow/memory_pool_test.cc b/cpp/src/arrow/memory_pool_test.cc index 591d86a23f5..9b4c9a78435 100644 --- a/cpp/src/arrow/memory_pool_test.cc +++ b/cpp/src/arrow/memory_pool_test.cc @@ -172,4 +172,33 @@ TEST(Jemalloc, SetDirtyPageDecayMillis) { #endif } +TEST(Jemalloc, GetAllocationStats) { + size_t allocated, active, metadata, resident, mapped = 0; +#ifdef ARROW_JEMALLOC + auto pool = MemoryPool::CreateDefault(); + uint8_t* data; + + ASSERT_OK(pool->Allocate(42, &data)); + ASSERT_OK(jemalloc_get_stat("stats.allocated", &allocated)); + ASSERT_OK(jemalloc_get_stat("stats.active", &active)); + ASSERT_OK(jemalloc_get_stat("stats.metadata", &metadata)); + ASSERT_OK(jemalloc_get_stat("stats.resident", &resident)); + ASSERT_OK(jemalloc_get_stat("stats.mapped", &mapped)); + + // TODO + ASSERT_EQ(71424, allocated); + ASSERT_EQ(131072, active); + ASSERT_EQ(2814368, metadata); + ASSERT_EQ(2899968, resident); + ASSERT_EQ(6422528, mapped); + +#else + ASSERT_RAISES(IOError, jemalloc_get_stat("stats.allocated", allocated)); + ASSERT_RAISES(IOError, jemalloc_get_stat("stats.active", active)); + ASSERT_RAISES(IOError, jemalloc_get_stat("stats.metadata", metadata)); + ASSERT_RAISES(IOError, jemalloc_get_stat("stats.resident", resident)); + ASSERT_RAISES(IOError, jemalloc_get_stat("stats.mapped", mapped)); +#endif +} + } // namespace arrow From 434366fe71a612d428c401223c0706c56d2773fd Mon Sep 17 00:00:00 2001 From: Rok Date: Thu, 8 Sep 2022 21:23:12 +0200 Subject: [PATCH 03/13] Refactoring and review feedback --- cpp/src/arrow/memory_pool.cc | 20 +++-- cpp/src/arrow/memory_pool.h | 58 +++++--------- cpp/src/arrow/memory_pool_jemalloc.cc | 49 +++++++++--- cpp/src/arrow/memory_pool_test.cc | 110 ++++++++++++++++++++------ 4 files changed, 155 insertions(+), 82 deletions(-) diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index ffefc796aee..6850539035d 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -670,18 +670,22 @@ Status jemalloc_set_decay_ms(int ms) { return Status::Invalid("jemalloc support is not built"); } -Status jemalloc_get_stat(const char* name, size_t* out) { +Status jemalloc_get_stat(const char* name, size_t& out) { return Status::Invalid("jemalloc support is not built"); -}; +} -Status jemalloc_mallctl(const char* name, void* oldp, size_t* oldlenp, void* newp, - size_t newlen) { - return Status::NotImplemented("This Arrow build does not enable jemalloc"); +Status jemalloc_get_stat(const char* name, uint64_t& out) { + return Status::Invalid("jemalloc support is not built"); } -Status jemalloc_stats_print(void (*write_cb)(void*, const char*), void* cbopaque, - const char* opts) { - return Status::NotImplemented("This Arrow build does not enable jemalloc"); +Status jemalloc_get_statp(const char* name, uint64_t& out) { + return Status::Invalid("jemalloc support is not built"); +} + +Status jemalloc_peak_reset() { return Status::Invalid("jemalloc support is not built"); } + +Status jemalloc_stats_print(const char* opts) { + return Status::Invalid("jemalloc support is not built"); } #endif diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h index 4f4c45f04f1..272d40a15b6 100644 --- a/cpp/src/arrow/memory_pool.h +++ b/cpp/src/arrow/memory_pool.h @@ -22,6 +22,7 @@ #include #include +#include "arrow/result.h" #include "arrow/status.h" #include "arrow/type_fwd.h" #include "arrow/util/visibility.h" @@ -176,46 +177,23 @@ ARROW_EXPORT Status jemalloc_set_decay_ms(int ms); /// \brief Get basic allocation statistics from jemalloc -Status jemalloc_get_stat(const char* name, size_t* out); - -/// \brief The mallctl() function provides a general interface for introspecting the -/// memory allocator, as well as setting modifiable parameters and triggering actions. -/// The period-separated name argument specifies a location in a tree-structured -/// namespace; see the MALLCTL NAMESPACE section in jemalloc project documentation for -/// more information on the tree contents. To read a value, pass a pointer via oldp to -/// adequate space to contain the value, and a pointer to its length via oldlenp; -/// otherwise pass NULL and NULL. Similarly, to write a value, pass a pointer to the -/// value via newp, and its length via newlen; otherwise pass NULL and 0. -/// -ARROW_EXPORT Status jemalloc_mallctl(const char* name, void* oldp, size_t* oldlenp, - void* newp, size_t newlen); - -/// \brief The malloc_stats_print() function writes summary statistics via the write_cb -/// callback function pointer and cbopaque data passed to write_cb, or malloc_message() -/// if write_cb is NULL. The statistics are presented in human-readable form unless “J” -/// is specified as a character within the opts string, in which case the statistics are -/// presented in JSON format. This function can be called repeatedly. General information -/// that never changes during execution can be omitted by specifying “g” as a character -/// within the opts string. Note that malloc_stats_print() uses the mallctl*() functions -/// internally, so inconsistent statistics can be reported if multiple threads use these -/// functions simultaneously. If --enable-stats is specified during configuration, “m”, -/// “d”, and “a” can be specified to omit merged arena, destroyed merged arena, and per -/// arena statistics, respectively; “b” and “l” can be specified to omit per size class -/// statistics for bins and large objects, respectively; “x” can be specified to omit all -/// mutex statistics; “e” can be used to omit extent statistics. Unrecognized characters -/// are silently ignored. Note that thread caching may prevent some statistics from being -/// completely up to date, since extra locking would be required to merge counters that -/// track thread cache operations. -/// -/// The malloc_usable_size() function returns the usable size of the allocation pointed to -/// by ptr. The return value may be larger than the size that was requested during -/// allocation. The malloc_usable_size() function is not a mechanism for in-place -/// realloc(); rather it is provided solely as a tool for introspection purposes. -/// Any discrepancy between the requested allocation size and the size reported by -/// malloc_usable_size() should not be depended on, since such behavior is entirely -/// implementation-dependent. -ARROW_EXPORT Status jemalloc_stats_print(void (*write_cb)(void*, const char*), - void* cbopaque, const char* opts); +/// See the MALLCTL NAMESPACE section in jemalloc project documentation for +/// available stats. +Status jemalloc_get_stat(const char* name, size_t& out); +Status jemalloc_get_stat(const char* name, uint64_t& out); + +/// \brief Get basic allocation statistics from jemalloc by passing a pointer +/// This is useful for avoiding the overhead of repeated copy calls. +Status jemalloc_get_statp(const char* name, uint64_t& out); + +/// \brief Resets the counter for bytes allocated in the calling thread to zero. +/// This affects subsequent calls to thread.peak.read, but not the values returned by +/// thread.allocated or thread.deallocated. +Status jemalloc_peak_reset(); + +/// \brief Prints summary statistics in human-readable form. See malloc_stats_print +/// documentation in jemalloc project documentation for available opt flags. +ARROW_EXPORT Status jemalloc_stats_print(const char* opts = ""); /// \brief Return a process-wide memory pool based on mimalloc. /// diff --git a/cpp/src/arrow/memory_pool_jemalloc.cc b/cpp/src/arrow/memory_pool_jemalloc.cc index ebad73043b4..de4636dc311 100644 --- a/cpp/src/arrow/memory_pool_jemalloc.cc +++ b/cpp/src/arrow/memory_pool_jemalloc.cc @@ -154,24 +154,49 @@ Status jemalloc_set_decay_ms(int ms) { #undef RETURN_IF_JEMALLOC_ERROR -Status jemalloc_get_stat(const char* name, size_t* out) { - size_t sz = sizeof(size_t); - int err = mallctl(name, out, &sz, NULL, 0); +#ifdef ARROW_JEMALLOC +Status jemalloc_get_stat(const char* name, size_t& out) { + uint64_t epoch; + size_t sz = sizeof(uint64_t); + mallctl("epoch", &epoch, &sz, &epoch, sz); + + size_t value; + sz = sizeof(size_t); + int err = mallctl(name, &value, &sz, NULLPTR, 0); + out = std::move(value); + return err ? arrow::internal::IOErrorFromErrno(err, "Failed retrieving ", &name) : Status::OK(); -}; +} -#ifdef ARROW_JEMALLOC -Status jemalloc_mallctl(const char* name, void* oldp, size_t* oldlenp, void* newp, - size_t newlen) { - int err = mallctl(name, oldp, oldlenp, newp, newlen); - return err ? arrow::internal::IOErrorFromErrno(err, "Memory allocation error.") +Status jemalloc_get_stat(const char* name, uint64_t& out) { + uint64_t value; + size_t sz = sizeof(uint64_t); + int err = mallctl(name, &value, &sz, NULLPTR, 0); + out = value; + + return err ? arrow::internal::IOErrorFromErrno(err, "Failed retrieving ", &name) + : Status::OK(); +} + +Status jemalloc_get_statp(const char* name, uint64_t& out) { + uint64_t* value; + size_t sz = sizeof(uint64_t); + int err = mallctl(name, &value, &sz, NULLPTR, 0); + out = *value; + + return err ? arrow::internal::IOErrorFromErrno(err, "Failed retrieving ", &name) + : Status::OK(); +} + +Status jemalloc_peak_reset() { + int err = mallctl("thread.peak.reset", NULLPTR, NULLPTR, NULLPTR, 0); + return err ? arrow::internal::IOErrorFromErrno(err, "Failed resetting thread.peak.") : Status::OK(); } -Status jemalloc_stats_print(void (*write_cb)(void*, const char*), void* cbopaque, - const char* opts) { - malloc_stats_print(write_cb, cbopaque, opts); +Status jemalloc_stats_print(const char* opts) { + malloc_stats_print(NULLPTR, NULLPTR, opts); return Status::OK(); } #endif diff --git a/cpp/src/arrow/memory_pool_test.cc b/cpp/src/arrow/memory_pool_test.cc index 9b4c9a78435..21a75436599 100644 --- a/cpp/src/arrow/memory_pool_test.cc +++ b/cpp/src/arrow/memory_pool_test.cc @@ -25,6 +25,7 @@ #include "arrow/status.h" #include "arrow/testing/gtest_util.h" #include "arrow/util/config.h" +#include "arrow/util/logging.h" namespace arrow { @@ -173,31 +174,96 @@ TEST(Jemalloc, SetDirtyPageDecayMillis) { } TEST(Jemalloc, GetAllocationStats) { - size_t allocated, active, metadata, resident, mapped = 0; #ifdef ARROW_JEMALLOC - auto pool = MemoryPool::CreateDefault(); uint8_t* data; - - ASSERT_OK(pool->Allocate(42, &data)); - ASSERT_OK(jemalloc_get_stat("stats.allocated", &allocated)); - ASSERT_OK(jemalloc_get_stat("stats.active", &active)); - ASSERT_OK(jemalloc_get_stat("stats.metadata", &metadata)); - ASSERT_OK(jemalloc_get_stat("stats.resident", &resident)); - ASSERT_OK(jemalloc_get_stat("stats.mapped", &mapped)); - - // TODO - ASSERT_EQ(71424, allocated); - ASSERT_EQ(131072, active); - ASSERT_EQ(2814368, metadata); - ASSERT_EQ(2899968, resident); - ASSERT_EQ(6422528, mapped); - + size_t allocated, active, metadata, resident, mapped, retained, allocated0, active0, + metadata0, resident0, mapped0, retained0; + uint64_t thread_allocatedp, thread_deallocatedp, thread_allocatedp0, + thread_deallocatedp0, thread_allocated, thread_deallocated, thread_peak_read, + thread_allocated0, thread_deallocated0, thread_peak_read0; + auto pool = default_memory_pool(); + ABORT_NOT_OK(jemalloc_memory_pool(&pool)); + ASSERT_EQ("jemalloc", pool->backend_name()); + + // Record stats before allocating + ASSERT_OK(jemalloc_get_stat("stats.allocated", allocated0)); + ASSERT_OK(jemalloc_get_stat("stats.active", active0)); + ASSERT_OK(jemalloc_get_stat("stats.metadata", metadata0)); + ASSERT_OK(jemalloc_get_stat("stats.resident", resident0)); + ASSERT_OK(jemalloc_get_stat("stats.mapped", mapped0)); + ASSERT_OK(jemalloc_get_stat("stats.retained", retained0)); + ASSERT_OK(jemalloc_get_stat("thread.allocated", thread_allocated0)); + ASSERT_OK(jemalloc_get_stat("thread.deallocated", thread_deallocated0)); + ASSERT_OK(jemalloc_get_stat("thread.peak.read", thread_peak_read0)); + ASSERT_OK(jemalloc_get_statp("thread.allocatedp", thread_allocatedp0)); + ASSERT_OK(jemalloc_get_statp("thread.deallocatedp", thread_deallocatedp0)); + + // Allocate memory + ASSERT_OK(jemalloc_set_decay_ms(10000)); + ASSERT_OK(pool->Allocate(1256, &data)); + ASSERT_EQ(1256, pool->bytes_allocated()); + ASSERT_OK(pool->Reallocate(1256, 1214, &data)); + ASSERT_EQ(1214, pool->bytes_allocated()); + + // Record stats after allocating + ASSERT_OK(jemalloc_get_stat("stats.allocated", allocated)); + ASSERT_OK(jemalloc_get_stat("stats.active", active)); + ASSERT_OK(jemalloc_get_stat("stats.metadata", metadata)); + ASSERT_OK(jemalloc_get_stat("stats.resident", resident)); + ASSERT_OK(jemalloc_get_stat("stats.mapped", mapped)); + ASSERT_OK(jemalloc_get_stat("stats.retained", retained)); + ASSERT_OK(jemalloc_get_stat("thread.allocated", thread_allocated)); + ASSERT_OK(jemalloc_get_stat("thread.deallocated", thread_deallocated)); + ASSERT_OK(jemalloc_get_stat("thread.peak.read", thread_peak_read)); + ASSERT_OK(jemalloc_get_statp("thread.allocatedp", thread_allocatedp)); + ASSERT_OK(jemalloc_get_statp("thread.deallocatedp", thread_deallocatedp)); + + // Reading stats via value return is equivalent to pointer passing + ASSERT_EQ(thread_allocated, thread_allocatedp); + ASSERT_EQ(thread_deallocated, thread_deallocatedp); + ASSERT_EQ(thread_allocated0, thread_allocatedp0); + ASSERT_EQ(thread_deallocated0, thread_deallocatedp0); + + // Check allocated stats pre-allocation + ASSERT_EQ(71424, allocated0); + ASSERT_EQ(131072, active0); + ASSERT_EQ(2814368, metadata0); + ASSERT_EQ(2899968, resident0); + ASSERT_EQ(6422528, mapped0); + ASSERT_EQ(0, retained0); + + // Check allocated stats change due to allocation + ASSERT_EQ(81920, allocated - allocated0); + ASSERT_EQ(81920, active - active0); + ASSERT_EQ(384, metadata - metadata0); + ASSERT_EQ(98304, resident - resident0); + ASSERT_EQ(81920, mapped - mapped0); + ASSERT_EQ(0, retained - retained0); + + ASSERT_EQ(1280, thread_peak_read - thread_peak_read0); + ASSERT_EQ(2560, thread_allocated - thread_allocated0); + ASSERT_EQ(1280, thread_deallocated - thread_deallocated0); + + // Resetting thread peak read metric + ASSERT_OK(pool->Allocate(12560, &data)); + ASSERT_OK(jemalloc_get_stat("thread.peak.read", thread_peak_read)); + ASSERT_EQ(15616, thread_peak_read); + ASSERT_OK(jemalloc_peak_reset()); + ASSERT_OK(pool->Allocate(1256, &data)); + ASSERT_OK(jemalloc_get_stat("thread.peak.read", thread_peak_read)); + ASSERT_EQ(1280, thread_peak_read); + + // Print statistics to stdout + ASSERT_OK(jemalloc_stats_print("ax")); #else - ASSERT_RAISES(IOError, jemalloc_get_stat("stats.allocated", allocated)); - ASSERT_RAISES(IOError, jemalloc_get_stat("stats.active", active)); - ASSERT_RAISES(IOError, jemalloc_get_stat("stats.metadata", metadata)); - ASSERT_RAISES(IOError, jemalloc_get_stat("stats.resident", resident)); - ASSERT_RAISES(IOError, jemalloc_get_stat("stats.mapped", mapped)); + size_t allocated; + uint64_t thread_peak_read, stats_allocated, stats_allocatedp; + ASSERT_RAISES(Invalid, jemalloc_get_stat("thread.peak.read", &thread_peak_read)); + ASSERT_RAISES(Invalid, jemalloc_get_stat("stats.allocated", &allocated)); + ASSERT_RAISES(Invalid, jemalloc_get_stat("stats.allocated", &stats_allocated)); + ASSERT_RAISES(Invalid, jemalloc_get_statp("stats.allocatedp", &stats_allocatedp)); + ASSERT_RAISES(Invalid, jemalloc_peak_reset()); + ASSERT_RAISES(Invalid, jemalloc_stats_print("ax")); #endif } From 5104759be6a6ef44ff054776e86f50b872614a2a Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Fri, 9 Sep 2022 16:10:11 +0200 Subject: [PATCH 04/13] Apply suggestions from code review Co-authored-by: Antoine Pitrou --- cpp/src/arrow/memory_pool.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h index 272d40a15b6..c27ace1e7ed 100644 --- a/cpp/src/arrow/memory_pool.h +++ b/cpp/src/arrow/memory_pool.h @@ -186,12 +186,14 @@ Status jemalloc_get_stat(const char* name, uint64_t& out); /// This is useful for avoiding the overhead of repeated copy calls. Status jemalloc_get_statp(const char* name, uint64_t& out); -/// \brief Resets the counter for bytes allocated in the calling thread to zero. +/// \brief Reset the counter for peak bytes allocated in the calling thread to zero. /// This affects subsequent calls to thread.peak.read, but not the values returned by /// thread.allocated or thread.deallocated. Status jemalloc_peak_reset(); -/// \brief Prints summary statistics in human-readable form. See malloc_stats_print +/// \brief Print summary statistics in human-readable form. +/// +/// See malloc_stats_print /// documentation in jemalloc project documentation for available opt flags. ARROW_EXPORT Status jemalloc_stats_print(const char* opts = ""); From 9b4d36a8f78dc74b004c155e2efbb367d51fc676 Mon Sep 17 00:00:00 2001 From: Rok Date: Fri, 9 Sep 2022 20:04:51 +0200 Subject: [PATCH 05/13] Refactoring and review feedback --- cpp/src/arrow/memory_pool.cc | 10 +---- cpp/src/arrow/memory_pool.h | 13 ++---- cpp/src/arrow/memory_pool_jemalloc.cc | 50 ++++++++++------------ cpp/src/arrow/memory_pool_test.cc | 60 +++++++++++++-------------- 4 files changed, 57 insertions(+), 76 deletions(-) diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index 6850539035d..69dad58e2ff 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -670,15 +670,7 @@ Status jemalloc_set_decay_ms(int ms) { return Status::Invalid("jemalloc support is not built"); } -Status jemalloc_get_stat(const char* name, size_t& out) { - return Status::Invalid("jemalloc support is not built"); -} - -Status jemalloc_get_stat(const char* name, uint64_t& out) { - return Status::Invalid("jemalloc support is not built"); -} - -Status jemalloc_get_statp(const char* name, uint64_t& out) { +Result jemalloc_get_stat(const char* name) { return Status::Invalid("jemalloc support is not built"); } diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h index c27ace1e7ed..6d97190586b 100644 --- a/cpp/src/arrow/memory_pool.h +++ b/cpp/src/arrow/memory_pool.h @@ -176,15 +176,10 @@ ARROW_EXPORT Status jemalloc_memory_pool(MemoryPool** out); ARROW_EXPORT Status jemalloc_set_decay_ms(int ms); -/// \brief Get basic allocation statistics from jemalloc +/// \brief Get basic statistics from jemalloc's mallctl. /// See the MALLCTL NAMESPACE section in jemalloc project documentation for /// available stats. -Status jemalloc_get_stat(const char* name, size_t& out); -Status jemalloc_get_stat(const char* name, uint64_t& out); - -/// \brief Get basic allocation statistics from jemalloc by passing a pointer -/// This is useful for avoiding the overhead of repeated copy calls. -Status jemalloc_get_statp(const char* name, uint64_t& out); +Result jemalloc_get_stat(const char* name); /// \brief Reset the counter for peak bytes allocated in the calling thread to zero. /// This affects subsequent calls to thread.peak.read, but not the values returned by @@ -193,8 +188,8 @@ Status jemalloc_peak_reset(); /// \brief Print summary statistics in human-readable form. /// -/// See malloc_stats_print -/// documentation in jemalloc project documentation for available opt flags. +/// See malloc_stats_print documentation in jemalloc project documentation for +/// available opt flags. ARROW_EXPORT Status jemalloc_stats_print(const char* opts = ""); /// \brief Return a process-wide memory pool based on mimalloc. diff --git a/cpp/src/arrow/memory_pool_jemalloc.cc b/cpp/src/arrow/memory_pool_jemalloc.cc index de4636dc311..7fa190f0fb0 100644 --- a/cpp/src/arrow/memory_pool_jemalloc.cc +++ b/cpp/src/arrow/memory_pool_jemalloc.cc @@ -155,38 +155,34 @@ Status jemalloc_set_decay_ms(int ms) { #undef RETURN_IF_JEMALLOC_ERROR #ifdef ARROW_JEMALLOC -Status jemalloc_get_stat(const char* name, size_t& out) { - uint64_t epoch; +Result jemalloc_get_stat(const char* name) { size_t sz = sizeof(uint64_t); - mallctl("epoch", &epoch, &sz, &epoch, sz); - - size_t value; - sz = sizeof(size_t); - int err = mallctl(name, &value, &sz, NULLPTR, 0); - out = std::move(value); - - return err ? arrow::internal::IOErrorFromErrno(err, "Failed retrieving ", &name) - : Status::OK(); -} - -Status jemalloc_get_stat(const char* name, uint64_t& out) { + int err; uint64_t value; - size_t sz = sizeof(uint64_t); - int err = mallctl(name, &value, &sz, NULLPTR, 0); - out = value; - return err ? arrow::internal::IOErrorFromErrno(err, "Failed retrieving ", &name) - : Status::OK(); -} + if (std::strcmp(name, "thread.allocatedp") == 0 || + std::strcmp(name, "thread.deallocatedp") == 0) { + uint64_t* tmp_value; + err = mallctl(name, &tmp_value, &sz, NULLPTR, 0); + value = *tmp_value; + } else if (std::strcmp(name, "stats.allocated") == 0 || + std::strcmp(name, "stats.active") == 0 || + std::strcmp(name, "stats.metadata") == 0 || + std::strcmp(name, "stats.resident") == 0 || + std::strcmp(name, "stats.mapped") == 0 || + std::strcmp(name, "stats.retained") == 0) { + uint64_t epoch; + mallctl("epoch", &epoch, &sz, &epoch, sz); + err = mallctl(name, &value, &sz, NULLPTR, 0); + } else { + err = mallctl(name, &value, &sz, NULLPTR, 0); + } -Status jemalloc_get_statp(const char* name, uint64_t& out) { - uint64_t* value; - size_t sz = sizeof(uint64_t); - int err = mallctl(name, &value, &sz, NULLPTR, 0); - out = *value; + if (err) { + return arrow::internal::IOErrorFromErrno(err, "Failed retrieving ", &name); + } - return err ? arrow::internal::IOErrorFromErrno(err, "Failed retrieving ", &name) - : Status::OK(); + return value; } Status jemalloc_peak_reset() { diff --git a/cpp/src/arrow/memory_pool_test.cc b/cpp/src/arrow/memory_pool_test.cc index 21a75436599..9811ec88849 100644 --- a/cpp/src/arrow/memory_pool_test.cc +++ b/cpp/src/arrow/memory_pool_test.cc @@ -176,7 +176,7 @@ TEST(Jemalloc, SetDirtyPageDecayMillis) { TEST(Jemalloc, GetAllocationStats) { #ifdef ARROW_JEMALLOC uint8_t* data; - size_t allocated, active, metadata, resident, mapped, retained, allocated0, active0, + uint64_t allocated, active, metadata, resident, mapped, retained, allocated0, active0, metadata0, resident0, mapped0, retained0; uint64_t thread_allocatedp, thread_deallocatedp, thread_allocatedp0, thread_deallocatedp0, thread_allocated, thread_deallocated, thread_peak_read, @@ -186,17 +186,17 @@ TEST(Jemalloc, GetAllocationStats) { ASSERT_EQ("jemalloc", pool->backend_name()); // Record stats before allocating - ASSERT_OK(jemalloc_get_stat("stats.allocated", allocated0)); - ASSERT_OK(jemalloc_get_stat("stats.active", active0)); - ASSERT_OK(jemalloc_get_stat("stats.metadata", metadata0)); - ASSERT_OK(jemalloc_get_stat("stats.resident", resident0)); - ASSERT_OK(jemalloc_get_stat("stats.mapped", mapped0)); - ASSERT_OK(jemalloc_get_stat("stats.retained", retained0)); - ASSERT_OK(jemalloc_get_stat("thread.allocated", thread_allocated0)); - ASSERT_OK(jemalloc_get_stat("thread.deallocated", thread_deallocated0)); - ASSERT_OK(jemalloc_get_stat("thread.peak.read", thread_peak_read0)); - ASSERT_OK(jemalloc_get_statp("thread.allocatedp", thread_allocatedp0)); - ASSERT_OK(jemalloc_get_statp("thread.deallocatedp", thread_deallocatedp0)); + ASSERT_OK_AND_ASSIGN(allocated0, jemalloc_get_stat("stats.allocated")); + ASSERT_OK_AND_ASSIGN(active0, jemalloc_get_stat("stats.active")); + ASSERT_OK_AND_ASSIGN(metadata0, jemalloc_get_stat("stats.metadata")); + ASSERT_OK_AND_ASSIGN(resident0, jemalloc_get_stat("stats.resident")); + ASSERT_OK_AND_ASSIGN(mapped0, jemalloc_get_stat("stats.mapped")); + ASSERT_OK_AND_ASSIGN(retained0, jemalloc_get_stat("stats.retained")); + ASSERT_OK_AND_ASSIGN(thread_allocated0, jemalloc_get_stat("thread.allocated")); + ASSERT_OK_AND_ASSIGN(thread_deallocated0, jemalloc_get_stat("thread.deallocated")); + ASSERT_OK_AND_ASSIGN(thread_peak_read0, jemalloc_get_stat("thread.peak.read")); + ASSERT_OK_AND_ASSIGN(thread_allocatedp0, jemalloc_get_stat("thread.allocatedp")); + ASSERT_OK_AND_ASSIGN(thread_deallocatedp0, jemalloc_get_stat("thread.deallocatedp")); // Allocate memory ASSERT_OK(jemalloc_set_decay_ms(10000)); @@ -206,17 +206,17 @@ TEST(Jemalloc, GetAllocationStats) { ASSERT_EQ(1214, pool->bytes_allocated()); // Record stats after allocating - ASSERT_OK(jemalloc_get_stat("stats.allocated", allocated)); - ASSERT_OK(jemalloc_get_stat("stats.active", active)); - ASSERT_OK(jemalloc_get_stat("stats.metadata", metadata)); - ASSERT_OK(jemalloc_get_stat("stats.resident", resident)); - ASSERT_OK(jemalloc_get_stat("stats.mapped", mapped)); - ASSERT_OK(jemalloc_get_stat("stats.retained", retained)); - ASSERT_OK(jemalloc_get_stat("thread.allocated", thread_allocated)); - ASSERT_OK(jemalloc_get_stat("thread.deallocated", thread_deallocated)); - ASSERT_OK(jemalloc_get_stat("thread.peak.read", thread_peak_read)); - ASSERT_OK(jemalloc_get_statp("thread.allocatedp", thread_allocatedp)); - ASSERT_OK(jemalloc_get_statp("thread.deallocatedp", thread_deallocatedp)); + ASSERT_OK_AND_ASSIGN(allocated, jemalloc_get_stat("stats.allocated")); + ASSERT_OK_AND_ASSIGN(active, jemalloc_get_stat("stats.active")); + ASSERT_OK_AND_ASSIGN(metadata, jemalloc_get_stat("stats.metadata")); + ASSERT_OK_AND_ASSIGN(resident, jemalloc_get_stat("stats.resident")); + ASSERT_OK_AND_ASSIGN(mapped, jemalloc_get_stat("stats.mapped")); + ASSERT_OK_AND_ASSIGN(retained, jemalloc_get_stat("stats.retained")); + ASSERT_OK_AND_ASSIGN(thread_allocated, jemalloc_get_stat("thread.allocated")); + ASSERT_OK_AND_ASSIGN(thread_deallocated, jemalloc_get_stat("thread.deallocated")); + ASSERT_OK_AND_ASSIGN(thread_peak_read, jemalloc_get_stat("thread.peak.read")); + ASSERT_OK_AND_ASSIGN(thread_allocatedp, jemalloc_get_stat("thread.allocatedp")); + ASSERT_OK_AND_ASSIGN(thread_deallocatedp, jemalloc_get_stat("thread.deallocatedp")); // Reading stats via value return is equivalent to pointer passing ASSERT_EQ(thread_allocated, thread_allocatedp); @@ -246,22 +246,20 @@ TEST(Jemalloc, GetAllocationStats) { // Resetting thread peak read metric ASSERT_OK(pool->Allocate(12560, &data)); - ASSERT_OK(jemalloc_get_stat("thread.peak.read", thread_peak_read)); + ASSERT_OK_AND_ASSIGN(thread_peak_read, jemalloc_get_stat("thread.peak.read")); ASSERT_EQ(15616, thread_peak_read); ASSERT_OK(jemalloc_peak_reset()); ASSERT_OK(pool->Allocate(1256, &data)); - ASSERT_OK(jemalloc_get_stat("thread.peak.read", thread_peak_read)); + ASSERT_OK_AND_ASSIGN(thread_peak_read, jemalloc_get_stat("thread.peak.read")); ASSERT_EQ(1280, thread_peak_read); // Print statistics to stdout ASSERT_OK(jemalloc_stats_print("ax")); #else - size_t allocated; - uint64_t thread_peak_read, stats_allocated, stats_allocatedp; - ASSERT_RAISES(Invalid, jemalloc_get_stat("thread.peak.read", &thread_peak_read)); - ASSERT_RAISES(Invalid, jemalloc_get_stat("stats.allocated", &allocated)); - ASSERT_RAISES(Invalid, jemalloc_get_stat("stats.allocated", &stats_allocated)); - ASSERT_RAISES(Invalid, jemalloc_get_statp("stats.allocatedp", &stats_allocatedp)); + ASSERT_RAISES(Invalid, jemalloc_get_stat("thread.peak.read")); + ASSERT_RAISES(Invalid, jemalloc_get_stat("stats.allocated")); + ASSERT_RAISES(Invalid, jemalloc_get_stat("stats.allocated")); + ASSERT_RAISES(Invalid, jemalloc_get_stat("stats.allocatedp")); ASSERT_RAISES(Invalid, jemalloc_peak_reset()); ASSERT_RAISES(Invalid, jemalloc_stats_print("ax")); #endif From 668feefdf7ce38014ee3ddbb05d70c072c6d5c56 Mon Sep 17 00:00:00 2001 From: Rok Date: Mon, 12 Sep 2022 17:57:55 +0200 Subject: [PATCH 06/13] jemalloc_stats_print can output std::string. --- cpp/src/arrow/memory_pool.cc | 8 +++- cpp/src/arrow/memory_pool.h | 14 ++++-- cpp/src/arrow/memory_pool_jemalloc.cc | 58 ++++++++++++++++++------- cpp/src/arrow/memory_pool_test.cc | 62 ++++++++++++--------------- 4 files changed, 88 insertions(+), 54 deletions(-) diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index 69dad58e2ff..70819b9139c 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -676,9 +676,15 @@ Result jemalloc_get_stat(const char* name) { Status jemalloc_peak_reset() { return Status::Invalid("jemalloc support is not built"); } -Status jemalloc_stats_print(const char* opts) { +Status jemalloc_stats_print(void (*write_cb)(void*, const char*), void* cbopaque, + const char* opts) { return Status::Invalid("jemalloc support is not built"); } + +Result jemalloc_stats_print(const char* opts) { + return Status::Invalid("jemalloc support is not built"); +} + #endif /////////////////////////////////////////////////////////////////////// diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h index 6d97190586b..f5e25a202f6 100644 --- a/cpp/src/arrow/memory_pool.h +++ b/cpp/src/arrow/memory_pool.h @@ -179,18 +179,26 @@ Status jemalloc_set_decay_ms(int ms); /// \brief Get basic statistics from jemalloc's mallctl. /// See the MALLCTL NAMESPACE section in jemalloc project documentation for /// available stats. +ARROW_EXPORT Result jemalloc_get_stat(const char* name); /// \brief Reset the counter for peak bytes allocated in the calling thread to zero. /// This affects subsequent calls to thread.peak.read, but not the values returned by /// thread.allocated or thread.deallocated. +ARROW_EXPORT Status jemalloc_peak_reset(); -/// \brief Print summary statistics in human-readable form. -/// +/// \brief Print summary statistics in human-readable form to stderr. +/// See malloc_stats_print documentation in jemalloc project documentation for +/// available opt flags. +ARROW_EXPORT +Status jemalloc_stats_print(void (*write_cb)(void* opaque, const char* buf), + void* cbopaque, const char* opts = ""); + +/// \brief Get summary statistics in human-readable form. /// See malloc_stats_print documentation in jemalloc project documentation for /// available opt flags. -ARROW_EXPORT Status jemalloc_stats_print(const char* opts = ""); +Result jemalloc_stats_print(const char* opts = ""); /// \brief Return a process-wide memory pool based on mimalloc. /// diff --git a/cpp/src/arrow/memory_pool_jemalloc.cc b/cpp/src/arrow/memory_pool_jemalloc.cc index 7fa190f0fb0..f255062182d 100644 --- a/cpp/src/arrow/memory_pool_jemalloc.cc +++ b/cpp/src/arrow/memory_pool_jemalloc.cc @@ -160,24 +160,18 @@ Result jemalloc_get_stat(const char* name) { int err; uint64_t value; - if (std::strcmp(name, "thread.allocatedp") == 0 || - std::strcmp(name, "thread.deallocatedp") == 0) { - uint64_t* tmp_value; - err = mallctl(name, &tmp_value, &sz, NULLPTR, 0); - value = *tmp_value; - } else if (std::strcmp(name, "stats.allocated") == 0 || - std::strcmp(name, "stats.active") == 0 || - std::strcmp(name, "stats.metadata") == 0 || - std::strcmp(name, "stats.resident") == 0 || - std::strcmp(name, "stats.mapped") == 0 || - std::strcmp(name, "stats.retained") == 0) { + if (std::strcmp(name, "stats.allocated") == 0 || + std::strcmp(name, "stats.active") == 0 || + std::strcmp(name, "stats.metadata") == 0 || + std::strcmp(name, "stats.resident") == 0 || + std::strcmp(name, "stats.mapped") == 0 || + std::strcmp(name, "stats.retained") == 0) { uint64_t epoch; mallctl("epoch", &epoch, &sz, &epoch, sz); - err = mallctl(name, &value, &sz, NULLPTR, 0); - } else { - err = mallctl(name, &value, &sz, NULLPTR, 0); } + err = mallctl(name, &value, &sz, NULLPTR, 0); + if (err) { return arrow::internal::IOErrorFromErrno(err, "Failed retrieving ", &name); } @@ -191,8 +185,40 @@ Status jemalloc_peak_reset() { : Status::OK(); } -Status jemalloc_stats_print(const char* opts) { - malloc_stats_print(NULLPTR, NULLPTR, opts); +typedef struct { + char* buf; + size_t len; +} parser_t; + +void write_cb(void* opaque, const char* str) { + parser_t* parser = reinterpret_cast(opaque); + size_t len = strlen(str); + char* buf = (parser->buf == NULL) + ? reinterpret_cast(mallocx(len + 1, MALLOCX_TCACHE_NONE)) + : reinterpret_cast( + rallocx(parser->buf, parser->len + len + 1, MALLOCX_TCACHE_NONE)); + if (buf == NULL) { + ARROW_LOG(ERROR) << "Unexpected input appending failure"; + } + memcpy(&buf[parser->len], str, len + 1); + parser->buf = buf; + parser->len += len; +} + +Result jemalloc_stats_print(const char* opts) { + parser_t parser = parser_t{NULL, 0}; + malloc_stats_print(write_cb, static_cast(&parser), opts); + std::string stats = parser.buf; + if (parser.buf != NULL) { + dallocx(parser.buf, MALLOCX_TCACHE_NONE); + } + return stats; +} + +Status jemalloc_stats_print(void (*write_cb)(void*, const char*), void* cbopaque, + const char* opts) { + malloc_stats_print(reinterpret_cast(write_cb), cbopaque, + opts); return Status::OK(); } #endif diff --git a/cpp/src/arrow/memory_pool_test.cc b/cpp/src/arrow/memory_pool_test.cc index 9811ec88849..d9c866b89a9 100644 --- a/cpp/src/arrow/memory_pool_test.cc +++ b/cpp/src/arrow/memory_pool_test.cc @@ -178,9 +178,8 @@ TEST(Jemalloc, GetAllocationStats) { uint8_t* data; uint64_t allocated, active, metadata, resident, mapped, retained, allocated0, active0, metadata0, resident0, mapped0, retained0; - uint64_t thread_allocatedp, thread_deallocatedp, thread_allocatedp0, - thread_deallocatedp0, thread_allocated, thread_deallocated, thread_peak_read, - thread_allocated0, thread_deallocated0, thread_peak_read0; + uint64_t thread_allocated, thread_deallocated, thread_peak_read, thread_allocated0, + thread_deallocated0, thread_peak_read0; auto pool = default_memory_pool(); ABORT_NOT_OK(jemalloc_memory_pool(&pool)); ASSERT_EQ("jemalloc", pool->backend_name()); @@ -195,15 +194,13 @@ TEST(Jemalloc, GetAllocationStats) { ASSERT_OK_AND_ASSIGN(thread_allocated0, jemalloc_get_stat("thread.allocated")); ASSERT_OK_AND_ASSIGN(thread_deallocated0, jemalloc_get_stat("thread.deallocated")); ASSERT_OK_AND_ASSIGN(thread_peak_read0, jemalloc_get_stat("thread.peak.read")); - ASSERT_OK_AND_ASSIGN(thread_allocatedp0, jemalloc_get_stat("thread.allocatedp")); - ASSERT_OK_AND_ASSIGN(thread_deallocatedp0, jemalloc_get_stat("thread.deallocatedp")); // Allocate memory ASSERT_OK(jemalloc_set_decay_ms(10000)); ASSERT_OK(pool->Allocate(1256, &data)); - ASSERT_EQ(1256, pool->bytes_allocated()); + ASSERT_EQ(pool->bytes_allocated(), 1256); ASSERT_OK(pool->Reallocate(1256, 1214, &data)); - ASSERT_EQ(1214, pool->bytes_allocated()); + ASSERT_EQ(pool->bytes_allocated(), 1214); // Record stats after allocating ASSERT_OK_AND_ASSIGN(allocated, jemalloc_get_stat("stats.allocated")); @@ -215,46 +212,43 @@ TEST(Jemalloc, GetAllocationStats) { ASSERT_OK_AND_ASSIGN(thread_allocated, jemalloc_get_stat("thread.allocated")); ASSERT_OK_AND_ASSIGN(thread_deallocated, jemalloc_get_stat("thread.deallocated")); ASSERT_OK_AND_ASSIGN(thread_peak_read, jemalloc_get_stat("thread.peak.read")); - ASSERT_OK_AND_ASSIGN(thread_allocatedp, jemalloc_get_stat("thread.allocatedp")); - ASSERT_OK_AND_ASSIGN(thread_deallocatedp, jemalloc_get_stat("thread.deallocatedp")); - - // Reading stats via value return is equivalent to pointer passing - ASSERT_EQ(thread_allocated, thread_allocatedp); - ASSERT_EQ(thread_deallocated, thread_deallocatedp); - ASSERT_EQ(thread_allocated0, thread_allocatedp0); - ASSERT_EQ(thread_deallocated0, thread_deallocatedp0); // Check allocated stats pre-allocation - ASSERT_EQ(71424, allocated0); - ASSERT_EQ(131072, active0); - ASSERT_EQ(2814368, metadata0); - ASSERT_EQ(2899968, resident0); - ASSERT_EQ(6422528, mapped0); - ASSERT_EQ(0, retained0); + ASSERT_NEAR(allocated0, 122624, 100000); + ASSERT_NEAR(active0, 131072, 10000); + ASSERT_NEAR(metadata0, 3000000, 1000000); + ASSERT_NEAR(resident0, 3000000, 1000000); + ASSERT_NEAR(mapped0, 6500000, 1000000); + ASSERT_NEAR(retained0, 0, 100); // Check allocated stats change due to allocation - ASSERT_EQ(81920, allocated - allocated0); - ASSERT_EQ(81920, active - active0); - ASSERT_EQ(384, metadata - metadata0); - ASSERT_EQ(98304, resident - resident0); - ASSERT_EQ(81920, mapped - mapped0); - ASSERT_EQ(0, retained - retained0); + ASSERT_NEAR(allocated - allocated0, 81920, 1000); + ASSERT_NEAR(active - active0, 81920, 1000); + ASSERT_NEAR(metadata - metadata0, 384, 100); + ASSERT_NEAR(resident - resident0, 98304, 1000); + ASSERT_NEAR(mapped - mapped0, 81920, 1000); + ASSERT_NEAR(retained - retained0, 0, 0); - ASSERT_EQ(1280, thread_peak_read - thread_peak_read0); - ASSERT_EQ(2560, thread_allocated - thread_allocated0); - ASSERT_EQ(1280, thread_deallocated - thread_deallocated0); + ASSERT_EQ(thread_peak_read - thread_peak_read0, 1280); + ASSERT_EQ(thread_allocated - thread_allocated0, 2560); + ASSERT_EQ(thread_deallocated - thread_deallocated0, 1280); // Resetting thread peak read metric ASSERT_OK(pool->Allocate(12560, &data)); ASSERT_OK_AND_ASSIGN(thread_peak_read, jemalloc_get_stat("thread.peak.read")); - ASSERT_EQ(15616, thread_peak_read); + ASSERT_NEAR(thread_peak_read, 15616, 1000); ASSERT_OK(jemalloc_peak_reset()); ASSERT_OK(pool->Allocate(1256, &data)); ASSERT_OK_AND_ASSIGN(thread_peak_read, jemalloc_get_stat("thread.peak.read")); - ASSERT_EQ(1280, thread_peak_read); + ASSERT_NEAR(thread_peak_read, 1280, 100); + + // Print statistics to stderr + ASSERT_OK(jemalloc_stats_print(NULLPTR, NULLPTR, "J")); + + // Read statistics into std::string + ASSERT_OK_AND_ASSIGN(std::string stats, jemalloc_stats_print("Jax")); + ASSERT_EQ(stats.rfind("{\"jemalloc\":{\"version\"", 0), 0); - // Print statistics to stdout - ASSERT_OK(jemalloc_stats_print("ax")); #else ASSERT_RAISES(Invalid, jemalloc_get_stat("thread.peak.read")); ASSERT_RAISES(Invalid, jemalloc_get_stat("stats.allocated")); From 329149829761fa235ea0e5c55be7b1d05acf1e3d Mon Sep 17 00:00:00 2001 From: Rok Date: Tue, 13 Sep 2022 02:27:36 +0200 Subject: [PATCH 07/13] CI issues --- cpp/src/arrow/memory_pool.cc | 4 ++-- cpp/src/arrow/memory_pool.h | 3 ++- cpp/src/arrow/memory_pool_jemalloc.cc | 4 ++-- cpp/src/arrow/memory_pool_test.cc | 22 +++++++++++----------- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index 70819b9139c..cc0d94175d4 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -676,8 +676,8 @@ Result jemalloc_get_stat(const char* name) { Status jemalloc_peak_reset() { return Status::Invalid("jemalloc support is not built"); } -Status jemalloc_stats_print(void (*write_cb)(void*, const char*), void* cbopaque, - const char* opts) { +Status jemalloc_stats_print(std::function* write_cb, + void* cbopaque, const char* opts) { return Status::Invalid("jemalloc support is not built"); } diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h index f5e25a202f6..b4ce225e7f7 100644 --- a/cpp/src/arrow/memory_pool.h +++ b/cpp/src/arrow/memory_pool.h @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -192,7 +193,7 @@ Status jemalloc_peak_reset(); /// See malloc_stats_print documentation in jemalloc project documentation for /// available opt flags. ARROW_EXPORT -Status jemalloc_stats_print(void (*write_cb)(void* opaque, const char* buf), +Status jemalloc_stats_print(std::function* write_cb, void* cbopaque, const char* opts = ""); /// \brief Get summary statistics in human-readable form. diff --git a/cpp/src/arrow/memory_pool_jemalloc.cc b/cpp/src/arrow/memory_pool_jemalloc.cc index f255062182d..bbdb7f6e54f 100644 --- a/cpp/src/arrow/memory_pool_jemalloc.cc +++ b/cpp/src/arrow/memory_pool_jemalloc.cc @@ -215,8 +215,8 @@ Result jemalloc_stats_print(const char* opts) { return stats; } -Status jemalloc_stats_print(void (*write_cb)(void*, const char*), void* cbopaque, - const char* opts) { +Status jemalloc_stats_print(std::function* write_cb, + void* cbopaque, const char* opts) { malloc_stats_print(reinterpret_cast(write_cb), cbopaque, opts); return Status::OK(); diff --git a/cpp/src/arrow/memory_pool_test.cc b/cpp/src/arrow/memory_pool_test.cc index d9c866b89a9..3e61e281227 100644 --- a/cpp/src/arrow/memory_pool_test.cc +++ b/cpp/src/arrow/memory_pool_test.cc @@ -197,10 +197,10 @@ TEST(Jemalloc, GetAllocationStats) { // Allocate memory ASSERT_OK(jemalloc_set_decay_ms(10000)); - ASSERT_OK(pool->Allocate(1256, &data)); - ASSERT_EQ(pool->bytes_allocated(), 1256); - ASSERT_OK(pool->Reallocate(1256, 1214, &data)); - ASSERT_EQ(pool->bytes_allocated(), 1214); + ASSERT_OK(pool->Allocate(1025, &data)); + ASSERT_EQ(pool->bytes_allocated(), 1025); + ASSERT_OK(pool->Reallocate(1025, 1023, &data)); + ASSERT_EQ(pool->bytes_allocated(), 1023); // Record stats after allocating ASSERT_OK_AND_ASSIGN(allocated, jemalloc_get_stat("stats.allocated")); @@ -222,15 +222,15 @@ TEST(Jemalloc, GetAllocationStats) { ASSERT_NEAR(retained0, 0, 100); // Check allocated stats change due to allocation - ASSERT_NEAR(allocated - allocated0, 81920, 1000); - ASSERT_NEAR(active - active0, 81920, 1000); - ASSERT_NEAR(metadata - metadata0, 384, 100); - ASSERT_NEAR(resident - resident0, 98304, 1000); - ASSERT_NEAR(mapped - mapped0, 81920, 1000); + ASSERT_NEAR(allocated - allocated0, 98304, 1000); + ASSERT_NEAR(active - active0, 98304, 1000); + ASSERT_NEAR(metadata - metadata0, 768, 100); + ASSERT_NEAR(resident - resident0, 114688, 1000); + ASSERT_NEAR(mapped - mapped0, 98304, 1000); ASSERT_NEAR(retained - retained0, 0, 0); - ASSERT_EQ(thread_peak_read - thread_peak_read0, 1280); - ASSERT_EQ(thread_allocated - thread_allocated0, 2560); + ASSERT_EQ(thread_peak_read - thread_peak_read0, 1024); + ASSERT_EQ(thread_allocated - thread_allocated0, 2304); ASSERT_EQ(thread_deallocated - thread_deallocated0, 1280); // Resetting thread peak read metric From f203a60632b70a1f6a393c0de592f772c80b555d Mon Sep 17 00:00:00 2001 From: Rok Date: Tue, 13 Sep 2022 14:43:07 +0200 Subject: [PATCH 08/13] Switching to std::string::append --- cpp/src/arrow/memory_pool_jemalloc.cc | 31 +++++---------------------- cpp/src/arrow/memory_pool_test.cc | 8 +++---- 2 files changed, 9 insertions(+), 30 deletions(-) diff --git a/cpp/src/arrow/memory_pool_jemalloc.cc b/cpp/src/arrow/memory_pool_jemalloc.cc index bbdb7f6e54f..4643e828011 100644 --- a/cpp/src/arrow/memory_pool_jemalloc.cc +++ b/cpp/src/arrow/memory_pool_jemalloc.cc @@ -185,33 +185,12 @@ Status jemalloc_peak_reset() { : Status::OK(); } -typedef struct { - char* buf; - size_t len; -} parser_t; - -void write_cb(void* opaque, const char* str) { - parser_t* parser = reinterpret_cast(opaque); - size_t len = strlen(str); - char* buf = (parser->buf == NULL) - ? reinterpret_cast(mallocx(len + 1, MALLOCX_TCACHE_NONE)) - : reinterpret_cast( - rallocx(parser->buf, parser->len + len + 1, MALLOCX_TCACHE_NONE)); - if (buf == NULL) { - ARROW_LOG(ERROR) << "Unexpected input appending failure"; - } - memcpy(&buf[parser->len], str, len + 1); - parser->buf = buf; - parser->len += len; -} - Result jemalloc_stats_print(const char* opts) { - parser_t parser = parser_t{NULL, 0}; - malloc_stats_print(write_cb, static_cast(&parser), opts); - std::string stats = parser.buf; - if (parser.buf != NULL) { - dallocx(parser.buf, MALLOCX_TCACHE_NONE); - } + std::string stats; + auto write_cb = [](void* opaque, const char* str) { + reinterpret_cast(opaque)->append(str); + }; + malloc_stats_print(write_cb, &stats, opts); return stats; } diff --git a/cpp/src/arrow/memory_pool_test.cc b/cpp/src/arrow/memory_pool_test.cc index 3e61e281227..f4858ce3deb 100644 --- a/cpp/src/arrow/memory_pool_test.cc +++ b/cpp/src/arrow/memory_pool_test.cc @@ -214,15 +214,15 @@ TEST(Jemalloc, GetAllocationStats) { ASSERT_OK_AND_ASSIGN(thread_peak_read, jemalloc_get_stat("thread.peak.read")); // Check allocated stats pre-allocation - ASSERT_NEAR(allocated0, 122624, 100000); - ASSERT_NEAR(active0, 131072, 10000); + ASSERT_NEAR(allocated0, 120000, 100000); + ASSERT_NEAR(active0, 75000, 70000); ASSERT_NEAR(metadata0, 3000000, 1000000); ASSERT_NEAR(resident0, 3000000, 1000000); ASSERT_NEAR(mapped0, 6500000, 1000000); - ASSERT_NEAR(retained0, 0, 100); + ASSERT_NEAR(retained0, 1500000, 1500000); // Check allocated stats change due to allocation - ASSERT_NEAR(allocated - allocated0, 98304, 1000); + ASSERT_NEAR(allocated - allocated0, 70000, 50000); ASSERT_NEAR(active - active0, 98304, 1000); ASSERT_NEAR(metadata - metadata0, 768, 100); ASSERT_NEAR(resident - resident0, 114688, 1000); From 98bfc2dedc638d6f05c120f4d560cf2639c58d44 Mon Sep 17 00:00:00 2001 From: Rok Date: Thu, 15 Sep 2022 15:18:11 +0200 Subject: [PATCH 09/13] windows compiler issues --- cpp/src/arrow/memory_pool.h | 1 + cpp/src/arrow/memory_pool_test.cc | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h index b4ce225e7f7..9d3e05ab0fb 100644 --- a/cpp/src/arrow/memory_pool.h +++ b/cpp/src/arrow/memory_pool.h @@ -199,6 +199,7 @@ Status jemalloc_stats_print(std::function* write_cb, /// \brief Get summary statistics in human-readable form. /// See malloc_stats_print documentation in jemalloc project documentation for /// available opt flags. +ARROW_EXPORT Result jemalloc_stats_print(const char* opts = ""); /// \brief Return a process-wide memory pool based on mimalloc. diff --git a/cpp/src/arrow/memory_pool_test.cc b/cpp/src/arrow/memory_pool_test.cc index f4858ce3deb..d58243cb3c0 100644 --- a/cpp/src/arrow/memory_pool_test.cc +++ b/cpp/src/arrow/memory_pool_test.cc @@ -223,10 +223,10 @@ TEST(Jemalloc, GetAllocationStats) { // Check allocated stats change due to allocation ASSERT_NEAR(allocated - allocated0, 70000, 50000); - ASSERT_NEAR(active - active0, 98304, 1000); - ASSERT_NEAR(metadata - metadata0, 768, 100); - ASSERT_NEAR(resident - resident0, 114688, 1000); - ASSERT_NEAR(mapped - mapped0, 98304, 1000); + ASSERT_NEAR(active - active0, 100000, 90000); + ASSERT_NEAR(metadata - metadata0, 400, 300); + ASSERT_NEAR(resident - resident0, 120000, 40000); + ASSERT_NEAR(mapped - mapped0, 100000, 50000); ASSERT_NEAR(retained - retained0, 0, 0); ASSERT_EQ(thread_peak_read - thread_peak_read0, 1024); @@ -255,6 +255,7 @@ TEST(Jemalloc, GetAllocationStats) { ASSERT_RAISES(Invalid, jemalloc_get_stat("stats.allocated")); ASSERT_RAISES(Invalid, jemalloc_get_stat("stats.allocatedp")); ASSERT_RAISES(Invalid, jemalloc_peak_reset()); + ASSERT_RAISES(Invalid, jemalloc_stats_print(NULLPTR, NULLPTR, "J")); ASSERT_RAISES(Invalid, jemalloc_stats_print("ax")); #endif } From 02d1c90ba71d6be4b1711f57466f7c2268c208ba Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Fri, 16 Sep 2022 12:52:31 +0200 Subject: [PATCH 10/13] Review feedback --- cpp/src/arrow/memory_pool.cc | 18 +++++----- cpp/src/arrow/memory_pool.h | 6 ++-- cpp/src/arrow/memory_pool_jemalloc.cc | 14 ++++---- cpp/src/arrow/memory_pool_test.cc | 51 +++++++++++++++++---------- 4 files changed, 52 insertions(+), 37 deletions(-) diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index cc0d94175d4..5b170e8527e 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -667,22 +667,24 @@ MemoryPool* default_memory_pool() { #ifndef ARROW_JEMALLOC Status jemalloc_set_decay_ms(int ms) { - return Status::Invalid("jemalloc support is not built"); + return Status::NotImplemented("jemalloc support is not built"); } -Result jemalloc_get_stat(const char* name) { - return Status::Invalid("jemalloc support is not built"); +Result jemalloc_get_stat(const char* name) { + return Status::NotImplemented("jemalloc support is not built"); } -Status jemalloc_peak_reset() { return Status::Invalid("jemalloc support is not built"); } +Status jemalloc_peak_reset() { + return Status::NotImplemented("jemalloc support is not built"); +} -Status jemalloc_stats_print(std::function* write_cb, - void* cbopaque, const char* opts) { - return Status::Invalid("jemalloc support is not built"); +Status jemalloc_stats_print(void (*write_cb)(void*, const char*), void* cbopaque, + const char* opts) { + return Status::NotImplemented("jemalloc support is not built"); } Result jemalloc_stats_print(const char* opts) { - return Status::Invalid("jemalloc support is not built"); + return Status::NotImplemented("jemalloc support is not built"); } #endif diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h index 9d3e05ab0fb..6ccb0acf6f6 100644 --- a/cpp/src/arrow/memory_pool.h +++ b/cpp/src/arrow/memory_pool.h @@ -181,7 +181,7 @@ Status jemalloc_set_decay_ms(int ms); /// See the MALLCTL NAMESPACE section in jemalloc project documentation for /// available stats. ARROW_EXPORT -Result jemalloc_get_stat(const char* name); +Result jemalloc_get_stat(const char* name); /// \brief Reset the counter for peak bytes allocated in the calling thread to zero. /// This affects subsequent calls to thread.peak.read, but not the values returned by @@ -193,8 +193,8 @@ Status jemalloc_peak_reset(); /// See malloc_stats_print documentation in jemalloc project documentation for /// available opt flags. ARROW_EXPORT -Status jemalloc_stats_print(std::function* write_cb, - void* cbopaque, const char* opts = ""); +Status jemalloc_stats_print(void (*write_cb)(void*, const char*), void* cbopaque, + const char* opts = ""); /// \brief Get summary statistics in human-readable form. /// See malloc_stats_print documentation in jemalloc project documentation for diff --git a/cpp/src/arrow/memory_pool_jemalloc.cc b/cpp/src/arrow/memory_pool_jemalloc.cc index 4643e828011..97e4a519eec 100644 --- a/cpp/src/arrow/memory_pool_jemalloc.cc +++ b/cpp/src/arrow/memory_pool_jemalloc.cc @@ -155,11 +155,12 @@ Status jemalloc_set_decay_ms(int ms) { #undef RETURN_IF_JEMALLOC_ERROR #ifdef ARROW_JEMALLOC -Result jemalloc_get_stat(const char* name) { +Result jemalloc_get_stat(const char* name) { size_t sz = sizeof(uint64_t); int err; uint64_t value; + // Update the statistics cached by mallctl. if (std::strcmp(name, "stats.allocated") == 0 || std::strcmp(name, "stats.active") == 0 || std::strcmp(name, "stats.metadata") == 0 || @@ -170,7 +171,7 @@ Result jemalloc_get_stat(const char* name) { mallctl("epoch", &epoch, &sz, &epoch, sz); } - err = mallctl(name, &value, &sz, NULLPTR, 0); + err = mallctl(name, &value, &sz, nullptr, 0); if (err) { return arrow::internal::IOErrorFromErrno(err, "Failed retrieving ", &name); @@ -180,7 +181,7 @@ Result jemalloc_get_stat(const char* name) { } Status jemalloc_peak_reset() { - int err = mallctl("thread.peak.reset", NULLPTR, NULLPTR, NULLPTR, 0); + int err = mallctl("thread.peak.reset", nullptr, nullptr, nullptr, 0); return err ? arrow::internal::IOErrorFromErrno(err, "Failed resetting thread.peak.") : Status::OK(); } @@ -194,10 +195,9 @@ Result jemalloc_stats_print(const char* opts) { return stats; } -Status jemalloc_stats_print(std::function* write_cb, - void* cbopaque, const char* opts) { - malloc_stats_print(reinterpret_cast(write_cb), cbopaque, - opts); +Status jemalloc_stats_print(void (*write_cb)(void*, const char*), void* cbopaque, + const char* opts) { + malloc_stats_print(write_cb, cbopaque, opts); return Status::OK(); } #endif diff --git a/cpp/src/arrow/memory_pool_test.cc b/cpp/src/arrow/memory_pool_test.cc index d58243cb3c0..3bd297a64d6 100644 --- a/cpp/src/arrow/memory_pool_test.cc +++ b/cpp/src/arrow/memory_pool_test.cc @@ -169,16 +169,16 @@ TEST(Jemalloc, SetDirtyPageDecayMillis) { #ifdef ARROW_JEMALLOC ASSERT_OK(jemalloc_set_decay_ms(0)); #else - ASSERT_RAISES(Invalid, jemalloc_set_decay_ms(0)); + ASSERT_RAISES(NotImplemented, jemalloc_set_decay_ms(0)); #endif } TEST(Jemalloc, GetAllocationStats) { #ifdef ARROW_JEMALLOC uint8_t* data; - uint64_t allocated, active, metadata, resident, mapped, retained, allocated0, active0, + int64_t allocated, active, metadata, resident, mapped, retained, allocated0, active0, metadata0, resident0, mapped0, retained0; - uint64_t thread_allocated, thread_deallocated, thread_peak_read, thread_allocated0, + int64_t thread_allocated, thread_deallocated, thread_peak_read, thread_allocated0, thread_deallocated0, thread_peak_read0; auto pool = default_memory_pool(); ABORT_NOT_OK(jemalloc_memory_pool(&pool)); @@ -219,18 +219,18 @@ TEST(Jemalloc, GetAllocationStats) { ASSERT_NEAR(metadata0, 3000000, 1000000); ASSERT_NEAR(resident0, 3000000, 1000000); ASSERT_NEAR(mapped0, 6500000, 1000000); - ASSERT_NEAR(retained0, 1500000, 1500000); + ASSERT_NEAR(retained0, 1500000, 2000000); // Check allocated stats change due to allocation ASSERT_NEAR(allocated - allocated0, 70000, 50000); ASSERT_NEAR(active - active0, 100000, 90000); - ASSERT_NEAR(metadata - metadata0, 400, 300); - ASSERT_NEAR(resident - resident0, 120000, 40000); - ASSERT_NEAR(mapped - mapped0, 100000, 50000); - ASSERT_NEAR(retained - retained0, 0, 0); + ASSERT_NEAR(metadata - metadata0, 500, 460); + ASSERT_NEAR(resident - resident0, 120000, 110000); + ASSERT_NEAR(mapped - mapped0, 100000, 90000); + ASSERT_NEAR(retained - retained0, 0, 40000); - ASSERT_EQ(thread_peak_read - thread_peak_read0, 1024); - ASSERT_EQ(thread_allocated - thread_allocated0, 2304); + ASSERT_NEAR(thread_peak_read - thread_peak_read0, 1024, 700); + ASSERT_NEAR(thread_allocated - thread_allocated0, 2500, 500); ASSERT_EQ(thread_deallocated - thread_deallocated0, 1280); // Resetting thread peak read metric @@ -243,20 +243,33 @@ TEST(Jemalloc, GetAllocationStats) { ASSERT_NEAR(thread_peak_read, 1280, 100); // Print statistics to stderr - ASSERT_OK(jemalloc_stats_print(NULLPTR, NULLPTR, "J")); + ASSERT_OK(jemalloc_stats_print(nullptr, nullptr, "J")); // Read statistics into std::string ASSERT_OK_AND_ASSIGN(std::string stats, jemalloc_stats_print("Jax")); - ASSERT_EQ(stats.rfind("{\"jemalloc\":{\"version\"", 0), 0); + // Read statistics into std::string with a lambda + std::string stats2; + auto write_cb = [](void* opaque, const char* str) { + reinterpret_cast(opaque)->append(str); + }; + ASSERT_OK(jemalloc_stats_print(write_cb, &stats2, "Jax")); + + ASSERT_EQ(stats.rfind("{\"jemalloc\":{\"version\"", 0), 0); + ASSERT_EQ(stats2.rfind("{\"jemalloc\":{\"version\"", 0), 0); + ASSERT_EQ(stats.substr(0, 100), stats2.substr(0, 100)); #else - ASSERT_RAISES(Invalid, jemalloc_get_stat("thread.peak.read")); - ASSERT_RAISES(Invalid, jemalloc_get_stat("stats.allocated")); - ASSERT_RAISES(Invalid, jemalloc_get_stat("stats.allocated")); - ASSERT_RAISES(Invalid, jemalloc_get_stat("stats.allocatedp")); - ASSERT_RAISES(Invalid, jemalloc_peak_reset()); - ASSERT_RAISES(Invalid, jemalloc_stats_print(NULLPTR, NULLPTR, "J")); - ASSERT_RAISES(Invalid, jemalloc_stats_print("ax")); + std::string stats; + auto write_cb = [](void* opaque, const char* str) { + reinterpret_cast(opaque)->append(str); + }; + ASSERT_RAISES(NotImplemented, jemalloc_get_stat("thread.peak.read")); + ASSERT_RAISES(NotImplemented, jemalloc_get_stat("stats.allocated")); + ASSERT_RAISES(NotImplemented, jemalloc_get_stat("stats.allocated")); + ASSERT_RAISES(NotImplemented, jemalloc_get_stat("stats.allocatedp")); + ASSERT_RAISES(NotImplemented, jemalloc_peak_reset()); + ASSERT_RAISES(NotImplemented, jemalloc_stats_print(write_cb, &stats, "Jax")); + ASSERT_RAISES(NotImplemented, jemalloc_stats_print("ax")); #endif } From 1af3ef542a2e207d367c0d400f09e33e601d3de0 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Tue, 20 Sep 2022 03:31:53 +0200 Subject: [PATCH 11/13] Apply suggestions from code review Co-authored-by: Ben Harkins <60872452+benibus@users.noreply.github.com> --- cpp/src/arrow/memory_pool.cc | 9 ++++++--- cpp/src/arrow/memory_pool.h | 10 ++++++++-- cpp/src/arrow/memory_pool_jemalloc.cc | 23 +++++++++++++++-------- cpp/src/arrow/memory_pool_test.cc | 16 ++++++---------- 4 files changed, 35 insertions(+), 23 deletions(-) diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index 5b170e8527e..638bbb3ab7f 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -678,12 +678,15 @@ Status jemalloc_peak_reset() { return Status::NotImplemented("jemalloc support is not built"); } -Status jemalloc_stats_print(void (*write_cb)(void*, const char*), void* cbopaque, - const char* opts) { +Status jemalloc_stats_print(const char* opts) { return Status::NotImplemented("jemalloc support is not built"); } -Result jemalloc_stats_print(const char* opts) { +Status jemalloc_stats_print(std::function write_cb, const char* opts) { + return Status::NotImplemented("jemalloc support is not built"); +} + +Result jemalloc_stats_string(const char* opts) { return Status::NotImplemented("jemalloc support is not built"); } diff --git a/cpp/src/arrow/memory_pool.h b/cpp/src/arrow/memory_pool.h index 6ccb0acf6f6..dba55268d69 100644 --- a/cpp/src/arrow/memory_pool.h +++ b/cpp/src/arrow/memory_pool.h @@ -193,14 +193,20 @@ Status jemalloc_peak_reset(); /// See malloc_stats_print documentation in jemalloc project documentation for /// available opt flags. ARROW_EXPORT -Status jemalloc_stats_print(void (*write_cb)(void*, const char*), void* cbopaque, +Status jemalloc_stats_print(const char* opts = ""); + +/// \brief Print summary statistics in human-readable form using a callback +/// See malloc_stats_print documentation in jemalloc project documentation for +/// available opt flags. +ARROW_EXPORT +Status jemalloc_stats_print(std::function write_cb, const char* opts = ""); /// \brief Get summary statistics in human-readable form. /// See malloc_stats_print documentation in jemalloc project documentation for /// available opt flags. ARROW_EXPORT -Result jemalloc_stats_print(const char* opts = ""); +Result jemalloc_stats_string(const char* opts = ""); /// \brief Return a process-wide memory pool based on mimalloc. /// diff --git a/cpp/src/arrow/memory_pool_jemalloc.cc b/cpp/src/arrow/memory_pool_jemalloc.cc index 97e4a519eec..75cfe408635 100644 --- a/cpp/src/arrow/memory_pool_jemalloc.cc +++ b/cpp/src/arrow/memory_pool_jemalloc.cc @@ -186,18 +186,25 @@ Status jemalloc_peak_reset() { : Status::OK(); } -Result jemalloc_stats_print(const char* opts) { +Result jemalloc_stats_string(const char* opts) { std::string stats; - auto write_cb = [](void* opaque, const char* str) { - reinterpret_cast(opaque)->append(str); - }; - malloc_stats_print(write_cb, &stats, opts); + auto write_cb = [&stats](const char* str) { stats.append(str); }; + ARROW_UNUSED(jemalloc_stats_print(write_cb, opts)); return stats; } -Status jemalloc_stats_print(void (*write_cb)(void*, const char*), void* cbopaque, - const char* opts) { - malloc_stats_print(write_cb, cbopaque, opts); +Status jemalloc_stats_print(const char* opts) { + malloc_stats_print(nullptr, nullptr, opts); + return Status::OK(); +} + +Status jemalloc_stats_print(std::function write_cb, const char* opts) { + auto cb_wrapper = [](void* opaque, const char* str) { + (*static_cast*>(opaque))(str); + }; + if (write_cb) { + malloc_stats_print(cb_wrapper, &write_cb, opts); + } return Status::OK(); } #endif diff --git a/cpp/src/arrow/memory_pool_test.cc b/cpp/src/arrow/memory_pool_test.cc index 3bd297a64d6..16a5f60f796 100644 --- a/cpp/src/arrow/memory_pool_test.cc +++ b/cpp/src/arrow/memory_pool_test.cc @@ -243,32 +243,28 @@ TEST(Jemalloc, GetAllocationStats) { ASSERT_NEAR(thread_peak_read, 1280, 100); // Print statistics to stderr - ASSERT_OK(jemalloc_stats_print(nullptr, nullptr, "J")); + ASSERT_OK(jemalloc_stats_print("J")); // Read statistics into std::string - ASSERT_OK_AND_ASSIGN(std::string stats, jemalloc_stats_print("Jax")); + ASSERT_OK_AND_ASSIGN(std::string stats, jemalloc_stats_string("Jax")); // Read statistics into std::string with a lambda std::string stats2; - auto write_cb = [](void* opaque, const char* str) { - reinterpret_cast(opaque)->append(str); - }; - ASSERT_OK(jemalloc_stats_print(write_cb, &stats2, "Jax")); + auto write_cb = [&stats2](const char* str) { stats2.append(str); }; + ASSERT_OK(jemalloc_stats_print(write_cb, "Jax")); ASSERT_EQ(stats.rfind("{\"jemalloc\":{\"version\"", 0), 0); ASSERT_EQ(stats2.rfind("{\"jemalloc\":{\"version\"", 0), 0); ASSERT_EQ(stats.substr(0, 100), stats2.substr(0, 100)); #else std::string stats; - auto write_cb = [](void* opaque, const char* str) { - reinterpret_cast(opaque)->append(str); - }; + auto write_cb = [&stats](const char* str) { stats.append(str); }; ASSERT_RAISES(NotImplemented, jemalloc_get_stat("thread.peak.read")); ASSERT_RAISES(NotImplemented, jemalloc_get_stat("stats.allocated")); ASSERT_RAISES(NotImplemented, jemalloc_get_stat("stats.allocated")); ASSERT_RAISES(NotImplemented, jemalloc_get_stat("stats.allocatedp")); ASSERT_RAISES(NotImplemented, jemalloc_peak_reset()); - ASSERT_RAISES(NotImplemented, jemalloc_stats_print(write_cb, &stats, "Jax")); + ASSERT_RAISES(NotImplemented, jemalloc_stats_print(write_cb, "Jax")); ASSERT_RAISES(NotImplemented, jemalloc_stats_print("ax")); #endif } From 5c2eefcc8a612db7f24556e8d801a78e8944f36c Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Wed, 21 Sep 2022 13:01:54 +0200 Subject: [PATCH 12/13] Update cpp/src/arrow/memory_pool_test.cc Co-authored-by: Antoine Pitrou --- cpp/src/arrow/memory_pool_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/memory_pool_test.cc b/cpp/src/arrow/memory_pool_test.cc index 16a5f60f796..f8aff09f399 100644 --- a/cpp/src/arrow/memory_pool_test.cc +++ b/cpp/src/arrow/memory_pool_test.cc @@ -180,7 +180,7 @@ TEST(Jemalloc, GetAllocationStats) { metadata0, resident0, mapped0, retained0; int64_t thread_allocated, thread_deallocated, thread_peak_read, thread_allocated0, thread_deallocated0, thread_peak_read0; - auto pool = default_memory_pool(); + MemoryPool* pool = nullptr; ABORT_NOT_OK(jemalloc_memory_pool(&pool)); ASSERT_EQ("jemalloc", pool->backend_name()); From 8f3b32f0a6dac9d9e6e16a08191a2621cb752b98 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Wed, 21 Sep 2022 13:06:23 +0200 Subject: [PATCH 13/13] Review feedback --- cpp/src/arrow/memory_pool_jemalloc.cc | 6 +----- cpp/src/arrow/memory_pool_test.cc | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/cpp/src/arrow/memory_pool_jemalloc.cc b/cpp/src/arrow/memory_pool_jemalloc.cc index 75cfe408635..03d2b28ee3e 100644 --- a/cpp/src/arrow/memory_pool_jemalloc.cc +++ b/cpp/src/arrow/memory_pool_jemalloc.cc @@ -154,7 +154,6 @@ Status jemalloc_set_decay_ms(int ms) { #undef RETURN_IF_JEMALLOC_ERROR -#ifdef ARROW_JEMALLOC Result jemalloc_get_stat(const char* name) { size_t sz = sizeof(uint64_t); int err; @@ -202,11 +201,8 @@ Status jemalloc_stats_print(std::function write_cb, const cha auto cb_wrapper = [](void* opaque, const char* str) { (*static_cast*>(opaque))(str); }; - if (write_cb) { - malloc_stats_print(cb_wrapper, &write_cb, opts); - } + malloc_stats_print(cb_wrapper, &write_cb, opts); return Status::OK(); } -#endif } // namespace arrow diff --git a/cpp/src/arrow/memory_pool_test.cc b/cpp/src/arrow/memory_pool_test.cc index f8aff09f399..5ac14a44b9a 100644 --- a/cpp/src/arrow/memory_pool_test.cc +++ b/cpp/src/arrow/memory_pool_test.cc @@ -196,7 +196,6 @@ TEST(Jemalloc, GetAllocationStats) { ASSERT_OK_AND_ASSIGN(thread_peak_read0, jemalloc_get_stat("thread.peak.read")); // Allocate memory - ASSERT_OK(jemalloc_set_decay_ms(10000)); ASSERT_OK(pool->Allocate(1025, &data)); ASSERT_EQ(pool->bytes_allocated(), 1025); ASSERT_OK(pool->Reallocate(1025, 1023, &data));