From a9964b135b3a351cf1002491d343581b46f0204d Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Mon, 13 Apr 2026 17:40:07 -0700 Subject: [PATCH 1/9] hexagon: allow host to set max vmem size We use a sane default but it's helpful to allow for an override if needed. --- ggml/src/ggml-hexagon/ggml-hexagon.cpp | 69 ++++++++++++++++--------- ggml/src/ggml-hexagon/htp/htp-ctx.h | 2 + ggml/src/ggml-hexagon/htp/htp-ops.h | 4 +- ggml/src/ggml-hexagon/htp/htp_iface.idl | 2 +- ggml/src/ggml-hexagon/htp/main.c | 6 +-- 5 files changed, 53 insertions(+), 30 deletions(-) diff --git a/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/ggml/src/ggml-hexagon/ggml-hexagon.cpp index 0d9b5e289bb..45377f729b4 100644 --- a/ggml/src/ggml-hexagon/ggml-hexagon.cpp +++ b/ggml/src/ggml-hexagon/ggml-hexagon.cpp @@ -44,6 +44,7 @@ #include "htp_iface.h" #include "htp-drv.h" +<<<<<<< HEAD using intvec = std::vector; using uintvec = std::vector; using u32vec = std::vector; @@ -66,6 +67,22 @@ static u32vec opt_pmu_evt { 0x3, 0x111, 0x100, 0x105, 0x240, 0x256, 0x7D, 0x8C } static int opt_opstage = HTP_OPSTAGE_QUEUE | HTP_OPSTAGE_COMPUTE; static int opt_opbatch = 1024; // max number of ops in a batch static int opt_opqueue = 16; // max number of pending batches +======= +static size_t opt_ndev = 1; +static size_t opt_nhvx = 0; // use all +static size_t opt_vmem = HTP_OP_MAX_VMEM_DEFAULT; // max available va space for buffer mappings +static int opt_arch = 0; // autodetect +static int opt_etm = 0; +static int opt_verbose = 0; +static int opt_profile = 0; +static int opt_hostbuf = 1; // hostbuf ON by default +static int opt_use_hmx = 1; // when set, enable HMX; when 0, use HVX only +static int opt_opmask = HTP_OPMASK_QUEUE | HTP_OPMASK_COMPUTE; // Enable all stages by default +static int opt_opsync = 0; // synchronous ops +static int opt_opbatch = 1024; // max number of ops in a batch +static int opt_opqueue = 16; // max number of pending batches + +>>>>>>> 6ee78ca87 (hexagon: allow host to set max vmem size) static std::regex* opt_opfilter = NULL; // regex of ops to not claim #define HEX_VERBOSE(...) \ @@ -1580,7 +1597,7 @@ struct ggml_hexagon_opbatch { n_ops_max = batch_size; n_tens_max = n_ops_max + n_ops_max * HTP_OP_MAX_INPUTS; - b_vmem_max = HTP_OP_MAX_VMEM; + b_vmem_max = opt_vmem; ops.resize(n_ops_max); @@ -2095,7 +2112,7 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) { this->op_queue = new ggml_hexagon_opqueue(this, opt_opbatch, opt_opqueue); // Start processing op batch requests - err = htp_iface_start(this->handle, dev_id, this->queue_id, opt_nhvx, opt_use_hmx); + err = htp_iface_start(this->handle, dev_id, this->queue_id, opt_nhvx, opt_use_hmx, opt_vmem); if (err != 0) { GGML_LOG_ERROR("ggml-hex: failed to start session: 0x%08x\n", (unsigned) err); throw std::runtime_error("ggml-hex: iface start failed (see log for details)"); @@ -3492,20 +3509,35 @@ static void ggml_hexagon_init(ggml_backend_reg * reg) { const char * str_use_hmx = getenv("GGML_HEXAGON_USE_HMX"); const char * str_ndev = getenv("GGML_HEXAGON_NDEV"); const char * str_arch = getenv("GGML_HEXAGON_ARCH"); + const char * str_vmem = getenv("GGML_HEXAGON_VMEM"); auto RE_ICASE = std::regex_constants::icase; opt_opfilter = str_opfilter ? new std::regex(str_opfilter, RE_ICASE) : NULL; - opt_verbose = str_verbose ? atoi(str_verbose) : 0; - opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : opt_hostbuf; - opt_opstage = str_opstage ? strtoul(str_opstage, NULL, 0) : opt_opstage; - opt_opbatch = str_opbatch ? strtoul(str_opbatch, NULL, 0) : opt_opbatch; - opt_opqueue = str_opqueue ? strtoul(str_opqueue, NULL, 0) : opt_opqueue; - opt_etm = str_etm ? atoi(str_etm) : 0; - opt_nhvx = str_nhvx ? strtoul(str_nhvx, NULL, 0) : opt_nhvx; - opt_use_hmx = str_use_hmx ? atoi(str_use_hmx) : opt_use_hmx; - opt_ndev = str_ndev ? strtoul(str_ndev, NULL, 0) : opt_ndev; - opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : opt_hostbuf; + opt_verbose = str_verbose ? atoi(str_verbose) : 0; + opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : opt_hostbuf; + opt_opmask = str_opmask ? strtoul(str_opmask, NULL, 0) : opt_opmask; + opt_opsync = str_opsync ? atoi(str_opsync) : opt_opsync; + opt_opbatch = str_opbatch ? strtoul(str_opbatch, NULL, 0) : opt_opbatch; + opt_opqueue = str_opqueue ? strtoul(str_opqueue, NULL, 0) : opt_opqueue; + opt_profile = str_profile ? atoi(str_profile) : 0; + opt_etm = str_etm ? atoi(str_etm) : 0; + opt_nhvx = str_nhvx ? strtoul(str_nhvx, NULL, 0) : opt_nhvx; + opt_use_hmx = str_use_hmx ? atoi(str_use_hmx) : opt_use_hmx; + opt_ndev = str_ndev ? strtoul(str_ndev, NULL, 0) : opt_ndev; + opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : opt_hostbuf; + opt_vmem = str_vmem ? strtoul(str_vmem, NULL, 0) : opt_vmem; + + if (opt_ndev > GGML_HEXAGON_MAX_SESSIONS) { + opt_ndev = GGML_HEXAGON_MAX_SESSIONS; + } + + if (str_arch) { + if (str_arch[0] == 'v') { + str_arch++; + } + opt_arch = strtoul(str_arch, NULL, 0); + } if (str_profile) { opt_pmu_evt = [&]() -> std::vector { @@ -3519,18 +3551,7 @@ static void ggml_hexagon_init(ggml_backend_reg * reg) { GGML_LOG_INFO("ggml-hex: Profiling mode %u : pmu-evt [ %s ]\n", opt_profile, vec_to_str(opt_pmu_evt).c_str()); } - - if (opt_ndev > GGML_HEXAGON_MAX_SESSIONS) { - opt_ndev = GGML_HEXAGON_MAX_SESSIONS; - } - - if (str_arch) { - if (str_arch[0] == 'v') { - str_arch++; - } - opt_arch = strtoul(str_arch, NULL, 0); - } - + reg->context = new ggml_hexagon_registry(reg); } diff --git a/ggml/src/ggml-hexagon/htp/htp-ctx.h b/ggml/src/ggml-hexagon/htp/htp-ctx.h index d704fedee9d..ccdf790d0f4 100644 --- a/ggml/src/ggml-hexagon/htp/htp-ctx.h +++ b/ggml/src/ggml-hexagon/htp/htp-ctx.h @@ -77,6 +77,8 @@ struct htp_context { atomic_bool vtcm_valid; atomic_bool vtcm_needs_release; + uint64_t max_vmem; + struct htp_ops_context octx; #ifdef HTP_HAS_HMX diff --git a/ggml/src/ggml-hexagon/htp/htp-ops.h b/ggml/src/ggml-hexagon/htp/htp-ops.h index 4397245c5b8..9bf1b20b276 100644 --- a/ggml/src/ggml-hexagon/htp/htp-ops.h +++ b/ggml/src/ggml-hexagon/htp/htp-ops.h @@ -95,9 +95,9 @@ enum htp_op_code { #define HTP_OP_MAX_TENSORS (HTP_OP_MAX_REQS * HTP_OP_MAX_INPUTS + HTP_OP_MAX_REQS) #if __HVX_ARCH__ < 75 -#define HTP_OP_MAX_VMEM (3167538380u) +#define HTP_OP_MAX_VMEM_DEFAULT (3167538380u) #else -#define HTP_OP_MAX_VMEM (3221225472u) +#define HTP_OP_MAX_VMEM_DEFAULT (3355443200u) #endif #define HTP_MMAP_MAX_VMEM (2147483648u) diff --git a/ggml/src/ggml-hexagon/htp/htp_iface.idl b/ggml/src/ggml-hexagon/htp/htp_iface.idl index dbcafd1d856..f92e379e0ab 100644 --- a/ggml/src/ggml-hexagon/htp/htp_iface.idl +++ b/ggml/src/ggml-hexagon/htp/htp_iface.idl @@ -11,7 +11,7 @@ struct htp_iface_pmu_conf { }; interface htp_iface : remote_handle64 { - AEEResult start(in uint32 sess_id, in uint64 dsp_queue_id, in uint32 n_hvx, in uint32 use_hmx); + AEEResult start(in uint32 sess_id, in uint64 dsp_queue_id, in uint32 n_hvx, in uint32 use_hmx, in uint64 max_vmem); AEEResult stop(); AEEResult mmap(in uint32 fd, in uint32 size, in uint32 pinned); AEEResult munmap(in uint32 fd); diff --git a/ggml/src/ggml-hexagon/htp/main.c b/ggml/src/ggml-hexagon/htp/main.c index f58347304be..861ac33a89b 100644 --- a/ggml/src/ggml-hexagon/htp/main.c +++ b/ggml/src/ggml-hexagon/htp/main.c @@ -358,7 +358,7 @@ static void vtcm_free(struct htp_context * ctx) { static void htp_packet_callback(dspqueue_t queue, int error, void * context); static void htp_error_callback(dspqueue_t queue, int error, void * context); -AEEResult htp_iface_start(remote_handle64 handle, uint32 sess_id, uint64 dsp_queue_id, uint32 n_hvx, uint32 use_hmx) { +AEEResult htp_iface_start(remote_handle64 handle, uint32 sess_id, uint64 dsp_queue_id, uint32 n_hvx, uint32 use_hmx, uint64_t max_vmem) { struct htp_context * ctx = (struct htp_context *) handle; if (!ctx) { @@ -376,12 +376,12 @@ AEEResult htp_iface_start(remote_handle64 handle, uint32 sess_id, uint64 dsp_que htp_error_callback, // Error callback; no errors expected on the DSP (void *) ctx, // Callback context &ctx->queue); - if (err) { FARF(ERROR, "Queue import failed with 0x%08x", (unsigned) err); return err; } + ctx->max_vmem = max_vmem; ctx->thread_id = qurt_thread_get_id(); ctx->thread_prio = qurt_thread_get_priority(ctx->thread_id); @@ -689,7 +689,7 @@ static void prep_op_bufs(struct htp_context *ctx, struct htp_buf_desc *bufs, uin FARF(HIGH, "prep-bufs : pass1 mmap-vmem %zu extra-vmem %zu n-bufs %u b-reuse %u", m_vmem, e_vmem, n_bufs, b_reuse); - if ((m_vmem + e_vmem) > HTP_OP_MAX_VMEM) { + if ((m_vmem + e_vmem) > ctx->max_vmem) { // Drop unused mappings for (uint32_t i=0; i < HTP_MAX_MMAPS; i++) { bool used = m_reuse & (1< Date: Mon, 13 Apr 2026 21:15:37 -0700 Subject: [PATCH 2/9] hexagon: add support for measuring vmem space and move pinned mmaping management to host --- ggml/src/ggml-hexagon/ggml-hexagon.cpp | 114 ++++++++++++----------- ggml/src/ggml-hexagon/htp/htp-ctx.h | 2 +- ggml/src/ggml-hexagon/htp/htp_iface.idl | 2 +- ggml/src/ggml-hexagon/htp/main.c | 14 +-- scripts/snapdragon/adb/run-cli.sh | 8 +- scripts/snapdragon/adb/run-completion.sh | 8 +- 6 files changed, 82 insertions(+), 66 deletions(-) diff --git a/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/ggml/src/ggml-hexagon/ggml-hexagon.cpp index 45377f729b4..f60cc3a3603 100644 --- a/ggml/src/ggml-hexagon/ggml-hexagon.cpp +++ b/ggml/src/ggml-hexagon/ggml-hexagon.cpp @@ -44,19 +44,19 @@ #include "htp_iface.h" #include "htp-drv.h" -<<<<<<< HEAD using intvec = std::vector; using uintvec = std::vector; using u32vec = std::vector; -static size_t opt_ndev = 1; -static size_t opt_nhvx = 0; // use all -static int opt_arch = 0; // autodetect -static int opt_etm = 0; -static int opt_verbose = 0; -static int opt_profile = 0; // profiling mode (0-disabled, 1-basic, 2-pmu) -static int opt_hostbuf = 1; // hostbuf ON by default -static int opt_use_hmx = 1; // when set, enable HMX; when 0, use HVX only +static size_t opt_ndev = 1; +static size_t opt_nhvx = 0; // use all +static int opt_arch = 0; // autodetect +static int opt_etm = 0; +static int opt_verbose = 0; +static int opt_profile = 0; // profiling mode (0-disabled, 1-basic, 2-pmu) +static int opt_hostbuf = 1; // hostbuf ON by default +static size_t opt_vmem = HTP_OP_MAX_VMEM_DEFAULT; // max available va space for buffer mappings +static int opt_use_hmx = 1; // when set, enable HMX; when 0, use HVX only // Default PMU events, if profiling with PMU (mode=2) is enabled // See https://docs.qualcomm.com/doc/80-N2040-60/topic/pmu-events.html @@ -67,22 +67,7 @@ static u32vec opt_pmu_evt { 0x3, 0x111, 0x100, 0x105, 0x240, 0x256, 0x7D, 0x8C } static int opt_opstage = HTP_OPSTAGE_QUEUE | HTP_OPSTAGE_COMPUTE; static int opt_opbatch = 1024; // max number of ops in a batch static int opt_opqueue = 16; // max number of pending batches -======= -static size_t opt_ndev = 1; -static size_t opt_nhvx = 0; // use all -static size_t opt_vmem = HTP_OP_MAX_VMEM_DEFAULT; // max available va space for buffer mappings -static int opt_arch = 0; // autodetect -static int opt_etm = 0; -static int opt_verbose = 0; -static int opt_profile = 0; -static int opt_hostbuf = 1; // hostbuf ON by default -static int opt_use_hmx = 1; // when set, enable HMX; when 0, use HVX only -static int opt_opmask = HTP_OPMASK_QUEUE | HTP_OPMASK_COMPUTE; // Enable all stages by default -static int opt_opsync = 0; // synchronous ops -static int opt_opbatch = 1024; // max number of ops in a batch -static int opt_opqueue = 16; // max number of pending batches - ->>>>>>> 6ee78ca87 (hexagon: allow host to set max vmem size) + // static std::regex* opt_opfilter = NULL; // regex of ops to not claim #define HEX_VERBOSE(...) \ @@ -208,33 +193,30 @@ struct ggml_hexagon_shared_buffer { bool mapped; bool pinned; - void mmap(bool pinned = false) { - int err = fastrpc_mmap(sess->domain_id, this->fd, (void *) this->base, 0, this->size, FASTRPC_MAP_FD_DELAYED); + void mmap() { + fastrpc_map_flags flags = this->pinned ? FASTRPC_MAP_FD : FASTRPC_MAP_FD_DELAYED; + + int err = fastrpc_mmap(sess->domain_id, this->fd, (void *) this->base, 0, this->size, flags); if (err != 0) { GGML_LOG_ERROR("ggml-hex: %s buffer mapping failed : domain_id %d size %zu fd %d error 0x%08x\n", sess->c_name(), sess->domain_id, this->size, this->fd, (unsigned) err); throw std::runtime_error("ggml-hex: fastrpc_mmap failed (see log for details)"); } - if (pinned) { - err = htp_iface_mmap(sess->handle, this->fd, this->size, pinned); - if (err != 0) { - GGML_LOG_ERROR("ggml-hex: %s buffer pinning failed : domain_id %d size %zu fd %d error 0x%08x\n", sess->c_name(), - sess->domain_id, this->size, this->fd, (unsigned) err); - throw std::runtime_error("ggml-hex: htp_iface_mmap failed (see log for details)"); - } - } - - this->mapped = true; - this->pinned = pinned; HEX_VERBOSE("ggml-hex: %s mapped buffer: base %p size %zu fd %d pinned %u\n", sess->c_name(), (void *) this->base, this->size, this->fd, pinned); + + this->mapped = true; } void unmap() { if (!this->mapped) return; - htp_iface_munmap(sess->handle, this->fd); + if (!this->pinned) { + // HTP might still hold a reference, tell it drop it + htp_iface_munmap(sess->handle, this->fd); + } + fastrpc_munmap(sess->domain_id, this->fd, (void *) this->base, this->size); HEX_VERBOSE("ggml-hex: %s unmapped buffer: base %p size %zu fd %d\n", sess->c_name(), @@ -244,7 +226,7 @@ struct ggml_hexagon_shared_buffer { this->fd = -1; } - void alloc(size_t size, bool pinned = false) { + void alloc(size_t size) { if (this->base) return; this->base = (uint8_t *) rpcmem_alloc2(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, size); @@ -262,8 +244,7 @@ struct ggml_hexagon_shared_buffer { HEX_VERBOSE("ggml-hex: %s allocated buffer: base %p size %zu fd %d pinned %d\n", sess->c_name(), (void *) this->base, this->size, this->fd, (int) pinned); - - mmap(pinned); + mmap(); } void free() { @@ -279,15 +260,14 @@ struct ggml_hexagon_shared_buffer { } ggml_hexagon_shared_buffer(ggml_hexagon_session * sess, size_t size, bool pinned = false) { - size += 4 * 1024; // extra page for padding - this->sess = sess; this->size = 0; this->base = nullptr; this->fd = -1; this->mapped = false; + this->pinned = pinned; - alloc(size, pinned); + alloc(size); } ~ggml_hexagon_shared_buffer() { @@ -1492,6 +1472,7 @@ static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer( ggml_backend_buffer_type_t buffer_type, size_t size) { auto sess = static_cast(buffer_type->context)->sess; try { + size += 4 * 1024; // guard page ggml_hexagon_shared_buffer * sbuf = new ggml_hexagon_shared_buffer(sess, size); return ggml_backend_buffer_init(buffer_type, ggml_backend_hexagon_buffer_interface, sbuf, size); } catch (const std::exception & exc) { @@ -1504,6 +1485,7 @@ static ggml_backend_buffer_t ggml_backend_hexagon_repack_buffer_type_alloc_buffe ggml_backend_buffer_type_t buffer_type, size_t size) { auto sess = static_cast(buffer_type->context)->sess; try { + size += 4 * 1024; // guard page ggml_hexagon_shared_buffer * sbuf = new ggml_hexagon_shared_buffer(sess, size); return ggml_backend_buffer_init(buffer_type, ggml_backend_hexagon_buffer_interface, sbuf, size); } catch (const std::exception & exc) { @@ -1961,6 +1943,27 @@ void ggml_hexagon_session::flush(bool all) { flush_pending(all); } +static size_t ggml_hexagon_measure_max_vmem(ggml_hexagon_session *sess) { + // This is very expensive but handy for figuring out exactly how much we can mmap on a specific device. + // Allocate a bunch of 128MB pinned buffers till failure. + + std::vector sbufs; + + size_t step = 128u * 1024 * 1024; + size_t vmem = 0; + + try { + while (1) { + sbufs.push_back(new ggml_hexagon_shared_buffer(sess, step, true)); + vmem += step; + } + } catch (...) { } + + for (auto b : sbufs) { delete b; } + + return vmem; +} + void ggml_hexagon_session::allocate(int dev_id) noexcept(false) { this->valid_session = false; this->valid_handle = false; @@ -2050,9 +2053,6 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) { this->valid_handle = true; - GGML_LOG_INFO("ggml-hex: new session: %s : session-id %d domain-id %d uri %s handle 0x%lx\n", this->name.c_str(), - this->session_id, this->domain_id, session_uri, (unsigned long) this->handle); - // Enable FastRPC QoS mode { struct remote_rpc_control_latency l; @@ -2064,6 +2064,9 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) { } } + GGML_LOG_INFO("ggml-hex: new session: %s : session-id %d domain-id %d uri %s handle 0x%lx\n", this->name.c_str(), + this->session_id, this->domain_id, session_uri, (unsigned long) this->handle); + const size_t req_q_size = (sizeof(htp_opbatch_req) * opt_opqueue * 2) + 1024; const size_t rsp_q_size = (sizeof(htp_opbatch_rsp) * opt_opqueue * 2) + 1024; @@ -2111,7 +2114,12 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) { this->op_batch = new ggml_hexagon_opbatch(this, opt_opbatch); this->op_queue = new ggml_hexagon_opqueue(this, opt_opbatch, opt_opqueue); - // Start processing op batch requests + if (!opt_vmem) { + opt_vmem = ggml_hexagon_measure_max_vmem(this); + GGML_LOG_INFO("ggml-hex: measured max vmem %zu\n", opt_vmem); + } + + // Start dspqueue/opbatch processing err = htp_iface_start(this->handle, dev_id, this->queue_id, opt_nhvx, opt_use_hmx, opt_vmem); if (err != 0) { GGML_LOG_ERROR("ggml-hex: failed to start session: 0x%08x\n", (unsigned) err); @@ -2125,17 +2133,17 @@ void ggml_hexagon_session::release() noexcept(true) { int err; - delete this->op_batch; - delete this->op_queue; - - // Stop the DSP-side service and close the queue if (this->valid_iface) { + // Stop dspqueue/opbatch processing err = htp_iface_stop(this->handle); if (err != 0) { GGML_ABORT("ggml-hex: htp_iface_stop failed: 0x%08x\n", (unsigned) err); } } + delete this->op_batch; + delete this->op_queue; + if (opt_etm) { err = htp_iface_etm(this->handle, 0); if (err != 0) { diff --git a/ggml/src/ggml-hexagon/htp/htp-ctx.h b/ggml/src/ggml-hexagon/htp/htp-ctx.h index ccdf790d0f4..e9c563ca887 100644 --- a/ggml/src/ggml-hexagon/htp/htp-ctx.h +++ b/ggml/src/ggml-hexagon/htp/htp-ctx.h @@ -20,7 +20,7 @@ struct htp_mmap { uint64_t size; uint64_t base; uint32_t fd; - uint32_t pinned; + uint32_t reserved; }; // Scratchpad state diff --git a/ggml/src/ggml-hexagon/htp/htp_iface.idl b/ggml/src/ggml-hexagon/htp/htp_iface.idl index f92e379e0ab..d696a5fba0c 100644 --- a/ggml/src/ggml-hexagon/htp/htp_iface.idl +++ b/ggml/src/ggml-hexagon/htp/htp_iface.idl @@ -13,7 +13,7 @@ struct htp_iface_pmu_conf { interface htp_iface : remote_handle64 { AEEResult start(in uint32 sess_id, in uint64 dsp_queue_id, in uint32 n_hvx, in uint32 use_hmx, in uint64 max_vmem); AEEResult stop(); - AEEResult mmap(in uint32 fd, in uint32 size, in uint32 pinned); + AEEResult mmap(in uint32 fd, in uint32 size); AEEResult munmap(in uint32 fd); AEEResult profiler(in uint32 mode, in htp_iface_pmu_conf pmu); AEEResult etm(in uint32 enable); diff --git a/ggml/src/ggml-hexagon/htp/main.c b/ggml/src/ggml-hexagon/htp/main.c index 861ac33a89b..e2c0cd6d7cc 100644 --- a/ggml/src/ggml-hexagon/htp/main.c +++ b/ggml/src/ggml-hexagon/htp/main.c @@ -210,7 +210,7 @@ AEEResult htp_iface_close(remote_handle64 handle) { return AEE_SUCCESS; } -AEEResult htp_iface_mmap(remote_handle64 handle, uint32 fd, uint32 size, uint32 pinned) { +AEEResult htp_iface_mmap(remote_handle64 handle, int fd, uint32_t size) { struct htp_context * ctx = (struct htp_context *) handle; if (!ctx) { return AEE_EBADPARM; @@ -220,7 +220,6 @@ AEEResult htp_iface_mmap(remote_handle64 handle, uint32 fd, uint32 size, uint32 for (uint32_t i=0; immap[i]; if (m->fd == fd) { - m->pinned = pinned; return AEE_SUCCESS; } } @@ -229,7 +228,7 @@ AEEResult htp_iface_mmap(remote_handle64 handle, uint32 fd, uint32 size, uint32 for (uint32_t i=0; immap[i]; if (!m->size) { - FARF(HIGH, "mmap : fd %u size %u pinned %u", fd, size, pinned); + FARF(HIGH, "mmap : fd %u size %u", fd, size); #if __HVX_ARCH__ > 73 void *va = HAP_mmap2(NULL, size, HAP_PROT_READ | HAP_PROT_WRITE, 0, fd, 0); #else @@ -248,7 +247,6 @@ AEEResult htp_iface_mmap(remote_handle64 handle, uint32 fd, uint32 size, uint32 m->base = (uint64_t) va; m->fd = fd; m->size = size; - m->pinned = pinned; return AEE_SUCCESS; } @@ -275,7 +273,6 @@ AEEResult htp_iface_munmap(remote_handle64 handle, uint32 fd) { m->size = 0; m->base = NULL; m->fd = -1; - m->pinned = 0; } } @@ -622,8 +619,8 @@ static inline bool reuse_buf(struct htp_context *ctx, uint32_t *m_reuse, struct } static inline void drop_mmap(struct htp_context *ctx, struct htp_mmap *m) { - if (m->size && !m->pinned) { - FARF(HIGH, "unmap : fd %u base %p size %u pinned %u", m->fd, (void*) m->base, (uint32_t) m->size, m->pinned); + if (m->size) { + FARF(HIGH, "unmap : fd %u base %p size %u", m->fd, (void*) m->base, (uint32_t) m->size); #if __HVX_ARCH__ > 73 HAP_munmap2((void *) m->base, m->size); #else @@ -660,9 +657,8 @@ static inline void mmap_buf(struct htp_context *ctx, struct htp_buf_desc *b) { m->base = b->base = (uint64_t) va; m->fd = b->fd; m->size = b->size; - m->pinned = 0; - FARF(HIGH, "mmap : fd %u base %p size %u pinned %u", m->fd, (void*) m->base, (uint32_t) m->size, m->pinned); + FARF(HIGH, "mmap : fd %u base %p size %u", m->fd, (void*) m->base, (uint32_t) m->size); return; } } diff --git a/scripts/snapdragon/adb/run-cli.sh b/scripts/snapdragon/adb/run-cli.sh index e1f0ac0eb8e..a8fdadefbce 100755 --- a/scripts/snapdragon/adb/run-cli.sh +++ b/scripts/snapdragon/adb/run-cli.sh @@ -54,13 +54,19 @@ opqueue= opflt= [ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF" +vmem= +[ "$VM" != "" ] && opflt="GGML_HEXAGON_VMEM=$VM" + +mbuf= +[ "$MB" != "" ] && opflt="GGML_HEXAGON_MBUF=$MB" + set -x adb $adbserial $adbhost shell " \ cd $basedir; ulimit -c unlimited; \ LD_LIBRARY_PATH=$basedir/$branch/lib \ ADSP_LIBRARY_PATH=$basedir/$branch/lib \ - $verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt \ + $verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt $vmem $mbuf \ ./$branch/bin/llama-cli --no-mmap -m $basedir/../gguf/$model \ --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \ --ctx-size 8192 --ubatch-size 256 -fa on \ diff --git a/scripts/snapdragon/adb/run-completion.sh b/scripts/snapdragon/adb/run-completion.sh index 7b84106dc83..5dc8d921fbe 100755 --- a/scripts/snapdragon/adb/run-completion.sh +++ b/scripts/snapdragon/adb/run-completion.sh @@ -54,13 +54,19 @@ opqueue= opflt= [ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF" +vmem= +[ "$VM" != "" ] && opflt="GGML_HEXAGON_VMEM=$VM" + +mbuf= +[ "$MB" != "" ] && opflt="GGML_HEXAGON_MBUF=$MB" + set -x adb $adbserial $adbhost shell " \ cd $basedir; ulimit -c unlimited; \ LD_LIBRARY_PATH=$basedir/$branch/lib \ ADSP_LIBRARY_PATH=$basedir/$branch/lib \ - $verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt \ + $verbose $sched $opmask $profile $nhvx $hmx $ndev $hb $opbatch $opqueue $opflt $vmem $mbuf \ ./$branch/bin/llama-completion --no-mmap -m $basedir/../gguf/$model \ --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \ --ctx-size 8192 --ubatch-size 256 -fa on \ From 835be0d314c081072842476b20f944bd98c663d3 Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Tue, 14 Apr 2026 21:22:55 -0700 Subject: [PATCH 3/9] hexagon: update vmem checks to use uint64 --- ggml/src/ggml-hexagon/htp/main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-hexagon/htp/main.c b/ggml/src/ggml-hexagon/htp/main.c index e2c0cd6d7cc..da106083a52 100644 --- a/ggml/src/ggml-hexagon/htp/main.c +++ b/ggml/src/ggml-hexagon/htp/main.c @@ -668,8 +668,8 @@ static void prep_op_bufs(struct htp_context *ctx, struct htp_buf_desc *bufs, uin uint32_t m_reuse = 0; // mmap reuse mask (index from ctx->mmap array) uint32_t b_reuse = 0; // buf reuse count - size_t m_vmem = 0; // mapped vmem - size_t e_vmem = 0; // extra vmem + uint64_t m_vmem = 0; // mapped vmem + uint64_t e_vmem = 0; // extra vmem // See what we can reuse for (uint32_t i=0; i < n_bufs; i++) { @@ -683,7 +683,8 @@ static void prep_op_bufs(struct htp_context *ctx, struct htp_buf_desc *bufs, uin // See how much vmem we have mmaped right now for (uint32_t i=0; immap[i].size; } - FARF(HIGH, "prep-bufs : pass1 mmap-vmem %zu extra-vmem %zu n-bufs %u b-reuse %u", m_vmem, e_vmem, n_bufs, b_reuse); + FARF(HIGH, "prep-bufs : pass1 mmap-vmem %zu extra-vmem %zu max-vmem %zu : n-bufs %u b-reuse %u", + (size_t) m_vmem, (size_t) e_vmem, (size_t) ctx->max_vmem, n_bufs, b_reuse); if ((m_vmem + e_vmem) > ctx->max_vmem) { // Drop unused mappings From 44e39d3343ea2291b843741f28cbb1f4e4afa2f3 Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Tue, 14 Apr 2026 21:47:46 -0700 Subject: [PATCH 4/9] hexagon: bump op buffers to 16 (matches max mmaps) --- ggml/src/ggml-hexagon/htp/htp-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-hexagon/htp/htp-ops.h b/ggml/src/ggml-hexagon/htp/htp-ops.h index 9bf1b20b276..107395ce8ea 100644 --- a/ggml/src/ggml-hexagon/htp/htp-ops.h +++ b/ggml/src/ggml-hexagon/htp/htp-ops.h @@ -90,7 +90,7 @@ enum htp_op_code { #define HTP_OP_MAX_INPUTS 6 // aka GGML_MAX_SRCS #define HTP_OP_MAX_PARAMS 16 // aka GGML_MAX_OP_PARAMS -#define HTP_OP_MAX_BUFS 8 +#define HTP_OP_MAX_BUFS 16 #define HTP_OP_MAX_REQS 256 #define HTP_OP_MAX_TENSORS (HTP_OP_MAX_REQS * HTP_OP_MAX_INPUTS + HTP_OP_MAX_REQS) From b97056351f299a8abb4e771ae31741841e82658e Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Tue, 14 Apr 2026 22:10:18 -0700 Subject: [PATCH 5/9] hexagon: bump default vmem to 3.2GB --- ggml/src/ggml-hexagon/htp/htp-ops.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ggml/src/ggml-hexagon/htp/htp-ops.h b/ggml/src/ggml-hexagon/htp/htp-ops.h index 107395ce8ea..66a3150c1a0 100644 --- a/ggml/src/ggml-hexagon/htp/htp-ops.h +++ b/ggml/src/ggml-hexagon/htp/htp-ops.h @@ -94,11 +94,7 @@ enum htp_op_code { #define HTP_OP_MAX_REQS 256 #define HTP_OP_MAX_TENSORS (HTP_OP_MAX_REQS * HTP_OP_MAX_INPUTS + HTP_OP_MAX_REQS) -#if __HVX_ARCH__ < 75 -#define HTP_OP_MAX_VMEM_DEFAULT (3167538380u) -#else #define HTP_OP_MAX_VMEM_DEFAULT (3355443200u) -#endif #define HTP_MMAP_MAX_VMEM (2147483648u) From 1c29ed9780cf0c527200a582c9194754c98e5d90 Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Wed, 15 Apr 2026 10:41:24 -0700 Subject: [PATCH 6/9] hexagon: add support for autodetecting vmem space and some logging cleanup in that area --- ggml/src/ggml-hexagon/ggml-hexagon.cpp | 151 ++++++++++++++----------- ggml/src/ggml-hexagon/htp/main.c | 2 +- 2 files changed, 86 insertions(+), 67 deletions(-) diff --git a/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/ggml/src/ggml-hexagon/ggml-hexagon.cpp index f60cc3a3603..f0023d3b450 100644 --- a/ggml/src/ggml-hexagon/ggml-hexagon.cpp +++ b/ggml/src/ggml-hexagon/ggml-hexagon.cpp @@ -48,15 +48,16 @@ using intvec = std::vector; using uintvec = std::vector; using u32vec = std::vector; +static int opt_arch = 0; // autodetect static size_t opt_ndev = 1; static size_t opt_nhvx = 0; // use all -static int opt_arch = 0; // autodetect +static int opt_use_hmx = 1; // when set, enable HMX; when 0, use HVX only +static size_t opt_vmem = HTP_OP_MAX_VMEM_DEFAULT; // max available va space for buffer mappings +static size_t opt_mbuf = 1ul * 1024 * 1024 * 1024; // max buffer size static int opt_etm = 0; static int opt_verbose = 0; static int opt_profile = 0; // profiling mode (0-disabled, 1-basic, 2-pmu) static int opt_hostbuf = 1; // hostbuf ON by default -static size_t opt_vmem = HTP_OP_MAX_VMEM_DEFAULT; // max available va space for buffer mappings -static int opt_use_hmx = 1; // when set, enable HMX; when 0, use HVX only // Default PMU events, if profiling with PMU (mode=2) is enabled // See https://docs.qualcomm.com/doc/80-N2040-60/topic/pmu-events.html @@ -67,7 +68,7 @@ static u32vec opt_pmu_evt { 0x3, 0x111, 0x100, 0x105, 0x240, 0x256, 0x7D, 0x8C } static int opt_opstage = HTP_OPSTAGE_QUEUE | HTP_OPSTAGE_COMPUTE; static int opt_opbatch = 1024; // max number of ops in a batch static int opt_opqueue = 16; // max number of pending batches - // + static std::regex* opt_opfilter = NULL; // regex of ops to not claim #define HEX_VERBOSE(...) \ @@ -112,7 +113,7 @@ static void ggml_hexagon_dump_op_supp(const std::string &sess_name, const struct if (!opt_verbose) return; op_desc desc(op); - GGML_LOG_DEBUG("ggml-hex: %s supports-op %s : %s : %s : %s : %s : %s : %s\n", sess_name.c_str(), + GGML_LOG_DEBUG("ggml-hex: %s supports-op %s: %s : %s : %s : %s : %s : %s\n", sess_name.c_str(), ggml_op_desc(op), desc.names, desc.dims, desc.types, desc.strides, desc.buffs, supp ? "yes" : "no"); } @@ -120,8 +121,6 @@ static void ggml_hexagon_dump_op_prof(const std::string &sess_name, const ggml_t uint32_t op_usec, uint32_t op_cycles, const uint32_t pmu[]) { if (!opt_profile) return; - op_desc desc(op); - char pmu_str[256] = ""; if (opt_profile > 1) { static_assert(HTP_PROF_PMU_NCNT == 8, "current implementation assumes 8 PMU counters"); @@ -129,6 +128,7 @@ static void ggml_hexagon_dump_op_prof(const std::string &sess_name, const ggml_t pmu[0], pmu[1], pmu[2], pmu[3], pmu[4], pmu[5], pmu[6], pmu[7]); } + op_desc desc(op); GGML_LOG_DEBUG("ggml-hex: %s profile-op %s: %s : %s : %s : %s : usec %u cycles %u%s\n", sess_name.c_str(), ggml_op_desc(op), desc.names, desc.dims, desc.types, desc.strides, op_usec, op_cycles, pmu_str); } @@ -1504,7 +1504,7 @@ static size_t ggml_backend_hexagon_buffer_type_get_alloc_size(ggml_backend_buffe } static size_t ggml_backend_hexagon_buffer_type_get_max_size(ggml_backend_buffer_type_t buffer_type) { - return 1UL * 1024 * 1024 * 1024; // 1GB per buffer + return opt_mbuf; // typically 1GB per buffer GGML_UNUSED(buffer_type); } @@ -1572,14 +1572,14 @@ struct ggml_hexagon_opbatch { d_map.clear(); } - ggml_hexagon_opbatch(ggml_hexagon_session *sess, size_t batch_size) { + ggml_hexagon_opbatch(ggml_hexagon_session *sess, size_t batch_size, size_t max_vmem) { this->sess = sess; n_bufs_max = HTP_OP_MAX_BUFS; n_ops_max = batch_size; n_tens_max = n_ops_max + n_ops_max * HTP_OP_MAX_INPUTS; - b_vmem_max = opt_vmem; + b_vmem_max = max_vmem; ops.resize(n_ops_max); @@ -1591,6 +1591,9 @@ struct ggml_hexagon_opbatch { t_map.reserve(n_tens_max); d_map.reserve(n_tens_max); + GGML_LOG_INFO("ggml-hex: %s op batching: n-bufs %u n-tensors %u n-ops %u vmem %zu\n", + sess->c_name(), n_bufs_max, n_tens_max, n_ops_max, b_vmem_max); + reset(); } @@ -1924,6 +1927,8 @@ void ggml_hexagon_session::flush_batch() { // Bump pending flag (cleared in the session::flush once we get the response) this->op_pending++; // atomic inc + HEX_VERBOSE("ggml-hex: %s queue-opbatch: %p size %u\n", this->c_name(), dbuf.ptr, dbuf.size); + int err = dspqueue_write(this->queue, 0, 1, &dbuf, sizeof(req), (const uint8_t*) &req, DSPQUEUE_TIMEOUT); if (err != 0) { GGML_ABORT("ggml-hex: %s dspqueue_write failed: 0x%08x\n", this->c_name(), (unsigned) err); @@ -1944,15 +1949,23 @@ void ggml_hexagon_session::flush(bool all) { } static size_t ggml_hexagon_measure_max_vmem(ggml_hexagon_session *sess) { - // This is very expensive but handy for figuring out exactly how much we can mmap on a specific device. - // Allocate a bunch of 128MB pinned buffers till failure. + // Allocate a bunch pinned buffers till failure. + // This is kind of expensive but handy for figuring out exactly how much we can mmap on a specific device. + // Typically we're going to allocate all/most of these buffers anyway for the model weights. std::vector sbufs; - size_t step = 128u * 1024 * 1024; + const size_t MiB = 1024 * 1024; + const size_t GiB = MiB * 1024; + size_t vmem = 0; + size_t step = 256u * MiB; try { + sbufs.push_back(new ggml_hexagon_shared_buffer(sess, GiB, true)); vmem += GiB; + sbufs.push_back(new ggml_hexagon_shared_buffer(sess, GiB, true)); vmem += GiB; + sbufs.push_back(new ggml_hexagon_shared_buffer(sess, GiB, true)); vmem += GiB; + while (1) { sbufs.push_back(new ggml_hexagon_shared_buffer(sess, step, true)); vmem += step; @@ -1961,7 +1974,7 @@ static size_t ggml_hexagon_measure_max_vmem(ggml_hexagon_session *sess) { for (auto b : sbufs) { delete b; } - return vmem; + return vmem - step; // backoff to account for overhead from internal mappings } void ggml_hexagon_session::allocate(int dev_id) noexcept(false) { @@ -1977,7 +1990,7 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) { this->op_pending = 0; - GGML_LOG_INFO("ggml-hex: allocating new session: %s\n", this->name.c_str()); + GGML_LOG_DEBUG("ggml-hex: %s allocating new session\n", this->name.c_str()); domain * my_domain = get_domain(this->domain_id); if (my_domain == NULL) { @@ -2064,7 +2077,7 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) { } } - GGML_LOG_INFO("ggml-hex: new session: %s : session-id %d domain-id %d uri %s handle 0x%lx\n", this->name.c_str(), + GGML_LOG_INFO("ggml-hex: %s new session : session-id %d domain-id %d uri %s handle 0x%lx\n", this->c_name(), this->session_id, this->domain_id, session_uri, (unsigned long) this->handle); const size_t req_q_size = (sizeof(htp_opbatch_req) * opt_opqueue * 2) + 1024; @@ -2111,18 +2124,19 @@ void ggml_hexagon_session::allocate(int dev_id) noexcept(false) { } // Allocate buffers and state for op batching - this->op_batch = new ggml_hexagon_opbatch(this, opt_opbatch); this->op_queue = new ggml_hexagon_opqueue(this, opt_opbatch, opt_opqueue); if (!opt_vmem) { opt_vmem = ggml_hexagon_measure_max_vmem(this); - GGML_LOG_INFO("ggml-hex: measured max vmem %zu\n", opt_vmem); + GGML_LOG_INFO("ggml-hex: %s measured max vmem %zu\n", this->c_name(), opt_vmem); } + this->op_batch = new ggml_hexagon_opbatch(this, opt_opbatch, opt_vmem); + // Start dspqueue/opbatch processing err = htp_iface_start(this->handle, dev_id, this->queue_id, opt_nhvx, opt_use_hmx, opt_vmem); if (err != 0) { - GGML_LOG_ERROR("ggml-hex: failed to start session: 0x%08x\n", (unsigned) err); + GGML_LOG_ERROR("ggml-hex: %s failed to start session: 0x%08x\n", this->c_name(), (unsigned) err); throw std::runtime_error("ggml-hex: iface start failed (see log for details)"); } this->valid_iface = true; @@ -3405,21 +3419,6 @@ struct ggml_hexagon_registry { ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) { GGML_LOG_INFO("ggml-hex: Hexagon backend (experimental) : allocating new registry : ndev %zu\n", opt_ndev); - if (!opt_arch) { - int err = get_hex_arch_ver(CDSP_DOMAIN_ID, &opt_arch); - if (err != 0) { - GGML_LOG_ERROR("ggml-hex: failed to query HTP version (err %d) defaulting to v73\n", err); - opt_arch = 73; - } - } - -#if defined(__ANDROID__) - if (opt_arch < 75) { - opt_ndev = 1; - GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75.\n"); - } -#endif - GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch); // Create devices / sessions @@ -3505,47 +3504,67 @@ static void ggml_hexagon_init(ggml_backend_reg * reg) { static_assert((unsigned int) HTP_TYPE_IQ4_NL == (unsigned int) GGML_TYPE_IQ4_NL, "please update hexagon_type to match ggml_type"); - const char * str_verbose = getenv("GGML_HEXAGON_VERBOSE"); - const char * str_hostbuf = getenv("GGML_HEXAGON_HOSTBUF"); - const char * str_opstage = getenv("GGML_HEXAGON_OPSTAGE"); - const char * str_opbatch = getenv("GGML_HEXAGON_OPBATCH"); - const char * str_opqueue = getenv("GGML_HEXAGON_OPQUEUE"); - const char * str_opfilter= getenv("GGML_HEXAGON_OPFILTER"); - const char * str_profile = getenv("GGML_HEXAGON_PROFILE"); - const char * str_etm = getenv("GGML_HEXAGON_ETM"); - const char * str_nhvx = getenv("GGML_HEXAGON_NHVX"); - const char * str_use_hmx = getenv("GGML_HEXAGON_USE_HMX"); - const char * str_ndev = getenv("GGML_HEXAGON_NDEV"); - const char * str_arch = getenv("GGML_HEXAGON_ARCH"); - const char * str_vmem = getenv("GGML_HEXAGON_VMEM"); + const char * str_verbose = getenv("GGML_HEXAGON_VERBOSE"); + const char * str_hostbuf = getenv("GGML_HEXAGON_HOSTBUF"); + const char * str_opstage = getenv("GGML_HEXAGON_OPSTAGE"); + const char * str_opbatch = getenv("GGML_HEXAGON_OPBATCH"); + const char * str_opqueue = getenv("GGML_HEXAGON_OPQUEUE"); + const char * str_opfilter = getenv("GGML_HEXAGON_OPFILTER"); + const char * str_profile = getenv("GGML_HEXAGON_PROFILE"); + const char * str_etm = getenv("GGML_HEXAGON_ETM"); + const char * str_nhvx = getenv("GGML_HEXAGON_NHVX"); + const char * str_use_hmx = getenv("GGML_HEXAGON_USE_HMX"); + const char * str_ndev = getenv("GGML_HEXAGON_NDEV"); + const char * str_arch = getenv("GGML_HEXAGON_ARCH"); + const char * str_vmem = getenv("GGML_HEXAGON_VMEM"); + const char * str_mbuf = getenv("GGML_HEXAGON_MBUF"); + + // Init Arch first since it affects other defaults + if (!str_arch) { + int err = get_hex_arch_ver(CDSP_DOMAIN_ID, &opt_arch); + if (err != 0) { + GGML_LOG_ERROR("ggml-hex: failed to query HTP version (err %d) defaulting to v73\n", err); + opt_arch = 73; + } + } else { + if (str_arch[0] == 'v' || str_arch[0] == 'V') { + str_arch++; + } + opt_arch = strtoul(str_arch, NULL, 0); + } + + size_t MiB = 1024 * 1024; + + // Update vmem default + opt_vmem = opt_arch >= 75 ? HTP_OP_MAX_VMEM_DEFAULT : 3000 * MiB; auto RE_ICASE = std::regex_constants::icase; - opt_opfilter = str_opfilter ? new std::regex(str_opfilter, RE_ICASE) : NULL; - opt_verbose = str_verbose ? atoi(str_verbose) : 0; - opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : opt_hostbuf; - opt_opmask = str_opmask ? strtoul(str_opmask, NULL, 0) : opt_opmask; - opt_opsync = str_opsync ? atoi(str_opsync) : opt_opsync; - opt_opbatch = str_opbatch ? strtoul(str_opbatch, NULL, 0) : opt_opbatch; - opt_opqueue = str_opqueue ? strtoul(str_opqueue, NULL, 0) : opt_opqueue; - opt_profile = str_profile ? atoi(str_profile) : 0; - opt_etm = str_etm ? atoi(str_etm) : 0; - opt_nhvx = str_nhvx ? strtoul(str_nhvx, NULL, 0) : opt_nhvx; - opt_use_hmx = str_use_hmx ? atoi(str_use_hmx) : opt_use_hmx; - opt_ndev = str_ndev ? strtoul(str_ndev, NULL, 0) : opt_ndev; - opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : opt_hostbuf; - opt_vmem = str_vmem ? strtoul(str_vmem, NULL, 0) : opt_vmem; + opt_opfilter = str_opfilter ? new std::regex(str_opfilter, RE_ICASE) : NULL; + opt_verbose = str_verbose ? atoi(str_verbose) : 0; + opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : opt_hostbuf; + opt_opstage = str_opstage ? strtoul(str_opstage, NULL, 0) : opt_opstage; + opt_opbatch = str_opbatch ? strtoul(str_opbatch, NULL, 0) : opt_opbatch; + opt_opqueue = str_opqueue ? strtoul(str_opqueue, NULL, 0) : opt_opqueue; + opt_profile = str_profile ? atoi(str_profile) : 0; + opt_etm = str_etm ? atoi(str_etm) : 0; + opt_nhvx = str_nhvx ? strtoul(str_nhvx, NULL, 0) : opt_nhvx; + opt_use_hmx = str_use_hmx ? atoi(str_use_hmx) : opt_use_hmx; + opt_ndev = str_ndev ? strtoul(str_ndev, NULL, 0) : opt_ndev; + opt_hostbuf = str_hostbuf ? atoi(str_hostbuf) : opt_hostbuf; + opt_mbuf = str_mbuf ? strtoul(str_mbuf, NULL, 0) * MiB : opt_mbuf; + opt_vmem = str_vmem ? strtoul(str_vmem, NULL, 0) * MiB : opt_vmem; if (opt_ndev > GGML_HEXAGON_MAX_SESSIONS) { opt_ndev = GGML_HEXAGON_MAX_SESSIONS; } - if (str_arch) { - if (str_arch[0] == 'v') { - str_arch++; - } - opt_arch = strtoul(str_arch, NULL, 0); +#if defined(__ANDROID__) + if (opt_arch < 75) { + opt_ndev = 1; + GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75.\n"); } +#endif if (str_profile) { opt_pmu_evt = [&]() -> std::vector { diff --git a/ggml/src/ggml-hexagon/htp/main.c b/ggml/src/ggml-hexagon/htp/main.c index da106083a52..49c1a15b344 100644 --- a/ggml/src/ggml-hexagon/htp/main.c +++ b/ggml/src/ggml-hexagon/htp/main.c @@ -210,7 +210,7 @@ AEEResult htp_iface_close(remote_handle64 handle) { return AEE_SUCCESS; } -AEEResult htp_iface_mmap(remote_handle64 handle, int fd, uint32_t size) { +AEEResult htp_iface_mmap(remote_handle64 handle, uint32_t fd, uint32_t size) { struct htp_context * ctx = (struct htp_context *) handle; if (!ctx) { return AEE_EBADPARM; From b81c1a9e22cf922819600dae479d1d9cc04e8f01 Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Tue, 28 Apr 2026 14:23:19 -0700 Subject: [PATCH 7/9] hexagon: fix whitespace warnings --- ggml/src/ggml-hexagon/ggml-hexagon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/ggml/src/ggml-hexagon/ggml-hexagon.cpp index f0023d3b450..9345da62168 100644 --- a/ggml/src/ggml-hexagon/ggml-hexagon.cpp +++ b/ggml/src/ggml-hexagon/ggml-hexagon.cpp @@ -3578,7 +3578,7 @@ static void ggml_hexagon_init(ggml_backend_reg * reg) { GGML_LOG_INFO("ggml-hex: Profiling mode %u : pmu-evt [ %s ]\n", opt_profile, vec_to_str(opt_pmu_evt).c_str()); } - + reg->context = new ggml_hexagon_registry(reg); } From 5619942f10961cc174c1f0cd50233b43c1762f11 Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Wed, 29 Apr 2026 10:17:05 -0700 Subject: [PATCH 8/9] Update scripts/snapdragon/adb/run-cli.sh Co-authored-by: Pascal --- scripts/snapdragon/adb/run-cli.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/snapdragon/adb/run-cli.sh b/scripts/snapdragon/adb/run-cli.sh index a8fdadefbce..3f89a7777c6 100755 --- a/scripts/snapdragon/adb/run-cli.sh +++ b/scripts/snapdragon/adb/run-cli.sh @@ -59,7 +59,11 @@ vmem= mbuf= [ "$MB" != "" ] && opflt="GGML_HEXAGON_MBUF=$MB" +vmem= +[ "$VM" != "" ] && vmem="GGML_HEXAGON_VMEM=$VM" +mbuf= +[ "$MB" != "" ] && mbuf="GGML_HEXAGON_MBUF=$MB" set -x adb $adbserial $adbhost shell " \ From 92c327042f1edcb61f0f6db9fe9b6de93c92995c Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Wed, 29 Apr 2026 10:19:37 -0700 Subject: [PATCH 9/9] hex-adb: fix run-completion script --- scripts/snapdragon/adb/run-completion.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/snapdragon/adb/run-completion.sh b/scripts/snapdragon/adb/run-completion.sh index 5dc8d921fbe..638823b8824 100755 --- a/scripts/snapdragon/adb/run-completion.sh +++ b/scripts/snapdragon/adb/run-completion.sh @@ -55,10 +55,10 @@ opflt= [ "$OF" != "" ] && opflt="GGML_HEXAGON_OPFILTER=$OF" vmem= -[ "$VM" != "" ] && opflt="GGML_HEXAGON_VMEM=$VM" +[ "$VM" != "" ] && vmem="GGML_HEXAGON_VMEM=$VM" mbuf= -[ "$MB" != "" ] && opflt="GGML_HEXAGON_MBUF=$MB" +[ "$MB" != "" ] && mbuf="GGML_HEXAGON_MBUF=$MB" set -x