Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,15 @@ add_subdirectory(minicpm4)
add_subdirectory(qwen3)
add_subdirectory(qwen3_service)
add_subdirectory(deepseek_ocr)

if(MLLM_BUILD_QNN_BACKEND)
add_subdirectory(qwen_npu)
endif()

if(MLLM_TRACY_ENABLE)
add_subdirectory(tracy_example)
endif()

if(MLLM_QUALCOMM_QNN_AOT_ON_X86_ENABLE)
add_subdirectory(qwen3_qnn_aot)
endif()
Comment on lines +20 to +22
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify that qwen3_qnn_aot directory exists and contains CMakeLists.txt

if [ -d "examples/qwen3_qnn_aot" ]; then
  echo "✓ Directory examples/qwen3_qnn_aot exists"
  if [ -f "examples/qwen3_qnn_aot/CMakeLists.txt" ]; then
    echo "✓ CMakeLists.txt found in examples/qwen3_qnn_aot"
    wc -l examples/qwen3_qnn_aot/CMakeLists.txt
  else
    echo "✗ CMakeLists.txt NOT found in examples/qwen3_qnn_aot"
    exit 1
  fi
else
  echo "✗ Directory examples/qwen3_qnn_aot does NOT exist"
  exit 1
fi

Repository: UbiquitousLearning/mllm

Length of output: 197


The qwen3_qnn_aot/CMakeLists.txt file is empty. This will cause build failures or undefined behavior when add_subdirectory() is called. The file must be populated with the appropriate CMake configuration before this conditional block can be enabled.

Empty file.
8 changes: 6 additions & 2 deletions mllm/backends/cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,10 @@ if(MLLM_BUILD_ARM_BACKEND)
PATTERN "*.h"
PATTERN "*.hpp")
else()
# X86 highway
# TODO
install(
TARGETS hwy
EXPORT MllmTargets
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib
RUNTIME DESTINATION bin)
endif()
67 changes: 67 additions & 0 deletions mllm/backends/qnn/aot/QnnTargetMachine.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright (c) MLLM Team.
// Licensed under the MIT License.

#pragma once

#include <cstdint>

namespace mllm::qnn::aot {

enum class QcomHTPArch : uint32_t {
NONE = 0,
V68 = 68,
V69 = 69,
V73 = 73,
V75 = 75,
V79 = 79,
V81 = 81,
};

enum QcomChipset : uint32_t {
UNKNOWN_SM = 0,
SA8295 = 39,
SM8350 = 35,
SM8450 = 36,
SM8475 = 42,
SM8550 = 43,
SM8650 = 57,
SM8750 = 69,
SM8850 = 87,
SSG2115P = 46,
SSG2125P = 58,
SXR1230P = 45,
SXR2230P = 53,
SXR2330P = 75,
QCS9100 = 77,
SAR2230P = 95,
SA8255 = 52,
SW6100 = 96,
};

enum QcomTryBestPerformance : uint32_t {
kHtpDefault = 0,
kHtpSustainedHighPerformance,
kHtpBurst,
kHtpHighPerformance,
kHtpPowerSaver,
kHtpLowPowerSaver,
kHtpHighPowerSaver,
kHtpLowBalanced,
kHtpBalanced,
};

// Protection Domain Session
enum QcomSecurityPDSession : uint32_t {
kHtpUnsignedPd = 0,
kHtpSignedPd,
};

struct QcomTargetMachine {
QcomChipset soc_htp_chipset;
QcomHTPArch soc_htp_arch;
QcomTryBestPerformance soc_htp_performance;
QcomSecurityPDSession soc_htp_security_pd_session;
uint32_t soc_htp_vtcm_total_memory_size;
};
Comment thread
chenghuaWang marked this conversation as resolved.

} // namespace mllm::qnn::aot
134 changes: 129 additions & 5 deletions mllm/backends/qnn/aot/QnnWrappersAPI.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
// Copyright (c) MLLM Team.
// Licensed under the MIT License.
#include <QNN/HTP/QnnHtpDevice.h>
#include <QNN/HTP/QnnHtpCommon.h>
#include <QNN/HTP/QnnHtpContext.h>

#include "QnnContext.h"
#include "mllm/utils/Common.hpp"
#include "mllm/backends/qnn/aot/QnnWrappersAPI.hpp"
#include "mllm/backends/qnn/aot/QnnTargetMachine.hpp"

namespace mllm::qnn::aot {

Expand Down Expand Up @@ -67,9 +74,11 @@ bool QnnDynSymbolLoader::loadQnnDynLibAtPath(const std::string& path, const std:
return false;
}

QnnAOTEnv::QnnAOTEnv() { _setup(); }
QnnAOTEnv::QnnAOTEnv(QcomTargetMachine& target_machine) : target_machine_(target_machine) { _setup(); }

QnnAOTEnv::QnnAOTEnv(const std::string& lib_path) { _setup(lib_path); }
QnnAOTEnv::QnnAOTEnv(const std::string& lib_path, QcomTargetMachine& target_machine) : target_machine_(target_machine) {
_setup(lib_path);
}

void QnnAOTEnv::_setup(const std::string& path) {
auto& loader = QnnDynSymbolLoader::instance();
Expand Down Expand Up @@ -121,9 +130,50 @@ void QnnAOTEnv::_setup(const std::string& path) {

MLLM_RT_ASSERT(status != QNN_PROPERTY_ERROR_UNKNOWN_KEY);
}

// Try to config this target machine
{
auto device_custom_config = createDecideCustomConfigInfo();
QnnHtpDevice_CustomConfig_t* p_custom_config = nullptr;

switch (target_machine_.soc_htp_security_pd_session) {
case QcomSecurityPDSession::kHtpSignedPd: {
p_custom_config = (QnnHtpDevice_CustomConfig_t*)malloc(sizeof(QnnHtpDevice_CustomConfig_t));
unreachable_handle_.push_back(p_custom_config);
p_custom_config->option = QNN_HTP_DEVICE_CONFIG_OPTION_SIGNEDPD;
p_custom_config->useSignedProcessDomain.useSignedProcessDomain = true;
p_custom_config->useSignedProcessDomain.deviceId = 0;
device_custom_config.push_back(static_cast<QnnDevice_CustomConfig_t>(p_custom_config));
break;
}
case QcomSecurityPDSession::kHtpUnsignedPd:
default: break;
}

const std::vector<QnnDevice_PlatformInfo_t*> device_platform_info = createDevicePlatformInfo();
uint32_t num_custom_configs = device_platform_info.size() + device_custom_config.size();
target_machine_qnn_config_.resize(num_custom_configs);

for (std::size_t i = 0; i < device_custom_config.size(); ++i) {
target_machine_qnn_config_[i].option = QNN_DEVICE_CONFIG_OPTION_CUSTOM;
target_machine_qnn_config_[i].customConfig = device_custom_config[i];
target_machine_qnn_config_ptrs_.push_back(&target_machine_qnn_config_[i]);
}

if (!device_platform_info.empty()) {
// The length of platform info can only be 1.
MLLM_RT_ASSERT_EQ(device_platform_info.size(), 1u);
target_machine_qnn_config_[device_custom_config.size()].option = QNN_DEVICE_CONFIG_OPTION_PLATFORM_INFO;
target_machine_qnn_config_[device_custom_config.size()].hardwareInfo = device_platform_info.back();
target_machine_qnn_config_ptrs_.push_back(&target_machine_qnn_config_[device_custom_config.size()]);
}

// null terminated
target_machine_qnn_config_ptrs_.push_back(nullptr);
}
Comment on lines +134 to +173
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Add null checks after malloc calls and improve error handling.

Lines 141-146 and throughout this section allocate memory with malloc but don't check if allocation succeeded. If malloc returns nullptr, dereferencing it causes undefined behavior.

Additionally, the code assumes createDevicePlatformInfo() returns exactly 1 element (line 165), but there's no error handling if it returns an unexpected result before the assertion.

Add null checks after all malloc calls:

 p_custom_config = (QnnHtpDevice_CustomConfig_t*)malloc(sizeof(QnnHtpDevice_CustomConfig_t));
+if (p_custom_config == nullptr) {
+  MLLM_ERROR_EXIT(ExitCode::kCoreError, "Failed to allocate QnnHtpDevice_CustomConfig_t");
+}
 unreachable_handle_.push_back(p_custom_config);

Apply similar checks to all other malloc calls in this method and in the helper methods (createDevicePlatformInfo, createDecideCustomConfigInfo, createContextCustomConfig).

🤖 Prompt for AI Agents
In mllm/backends/qnn/aot/QnnWrappersAPI.cpp around lines 134-173, add robust
null-checks and error paths for all dynamic allocations and validate helper
outputs: after every malloc (e.g. the QnnHtpDevice_CustomConfig_t allocation)
check for nullptr and on failure log an error, clean up any previously allocated
resources in unreachable_handle_ and target_machine_qnn_config_ptrs_, and
return/propagate an error instead of dereferencing; likewise check the return
vectors from createDecideCustomConfigInfo(), createDevicePlatformInfo() and
other helpers before using them (do not rely solely on the assert) — if
createDevicePlatformInfo() returns an unexpected size, log/error and perform
cleanup and early exit; apply the same pattern to other mallocs in this method
and the helper functions (validate allocations and helper results, free any
allocated memory on error, and avoid proceeding when inputs are invalid).

}

std::shared_ptr<QnnDeviceAndContext> QnnAOTEnv::createContext(const std::string& name) {
std::shared_ptr<QnnDeviceAndContext> QnnAOTEnv::createContext(const std::string& name, bool weights_sharing) {
std::shared_ptr<QnnDeviceAndContext> context = std::make_shared<QnnDeviceAndContext>();
context->name_ = name;

Expand All @@ -134,10 +184,9 @@ std::shared_ptr<QnnDeviceAndContext> QnnAOTEnv::createContext(const std::string&
// clang-format on

// 2. Create HTP Device
// FIXME(wch): we need to model each Hexagon machine with its special device info.
// clang-format off
if (nullptr != qnn_htp_func_symbols_.qnn_interface_.deviceCreate) {
auto status = qnn_htp_func_symbols_.qnn_interface_.deviceCreate(context->log_, nullptr, &context->device_handle_);
auto status = qnn_htp_func_symbols_.qnn_interface_.deviceCreate(context->log_, target_machine_qnn_config_ptrs_.data(), &context->device_handle_);
MLLM_RT_ASSERT_EQ(status, QNN_SUCCESS);
}
// clang-format on
Expand All @@ -151,6 +200,9 @@ std::shared_ptr<QnnDeviceAndContext> QnnAOTEnv::createContext(const std::string&

// 4. Create Context
{
auto cfgs = createContextCustomConfig(weights_sharing);
// Current not support
MLLM_RT_ASSERT_EQ(cfgs.size(), 0);
auto status = qnn_htp_func_symbols_.qnn_interface_.contextCreate(context->bk_handle_, context->device_handle_,
Comment on lines +203 to 206
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Clarify or remove dead code path for context custom config.

The cfgs variable is computed but immediately asserted to be empty, making the computation unnecessary. Either remove this code if weight sharing context config isn't supported yet, or wire cfgs into contextCreate. The current code is confusing.

If weight sharing config is not yet supported, consider:

-    auto cfgs = createContextCustomConfig(weights_sharing);
-    // Current not support
-    MLLM_RT_ASSERT_EQ(cfgs.size(), 0);
+    // TODO: Wire context custom config when weight sharing support is added
+    MLLM_RT_ASSERT_EQ(weights_sharing, false);  // Weight sharing not yet supported
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
auto cfgs = createContextCustomConfig(weights_sharing);
// Current not support
MLLM_RT_ASSERT_EQ(cfgs.size(), 0);
auto status = qnn_htp_func_symbols_.qnn_interface_.contextCreate(context->bk_handle_, context->device_handle_,
// TODO: Wire context custom config when weight sharing support is added
MLLM_RT_ASSERT_EQ(weights_sharing, false); // Weight sharing not yet supported
auto status = qnn_htp_func_symbols_.qnn_interface_.contextCreate(context->bk_handle_, context->device_handle_,
🤖 Prompt for AI Agents
mllm/backends/qnn/aot/QnnWrappersAPI.cpp lines 203-206: The code computes cfgs
via createContextCustomConfig(weights_sharing) then immediately asserts
cfgs.size() == 0, which makes the computation dead code and confusing; either
remove the cfgs computation and the assert if custom config isn't supported yet,
or wire cfgs into the subsequent contextCreate call (pass cfgs data/size to the
API) and remove the assert. If you choose to keep the unsupported path, replace
the assert with a clear TODO comment and remove the unused variable to avoid
confusion.

(const QnnContext_Config_t**)&context->qnn_context_config_,
&context->qnn_ctx_handle_);
Expand Down Expand Up @@ -196,4 +248,76 @@ void QnnAOTEnv::destroyContext(const std::string& name) {
// TODO
}

std::vector<QnnDevice_PlatformInfo_t*> QnnAOTEnv::createDevicePlatformInfo() {
std::vector<QnnDevice_PlatformInfo_t*> ret;
QnnDevice_PlatformInfo_t* p_platform_info = nullptr;
QnnDevice_HardwareDeviceInfo_t* p_hw_device_info = nullptr;
QnnHtpDevice_DeviceInfoExtension_t* p_device_info_extension = nullptr;
QnnDevice_CoreInfo_t* p_core_info = nullptr;

p_platform_info = (QnnDevice_PlatformInfo_t*)malloc(sizeof(QnnDevice_PlatformInfo_t));
unreachable_handle_.push_back(p_platform_info);
p_platform_info->version = QNN_DEVICE_PLATFORM_INFO_VERSION_1;
p_platform_info->v1.numHwDevices = 1;

p_hw_device_info = (QnnDevice_HardwareDeviceInfo_t*)malloc(sizeof(QnnDevice_HardwareDeviceInfo_t));
unreachable_handle_.push_back(p_hw_device_info);
p_hw_device_info->version = QNN_DEVICE_HARDWARE_DEVICE_INFO_VERSION_1;
p_hw_device_info->v1.deviceId = 0;
p_hw_device_info->v1.deviceType = 0;
p_hw_device_info->v1.numCores = 1;

p_device_info_extension = (QnnHtpDevice_DeviceInfoExtension_t*)malloc(sizeof(QnnHtpDevice_DeviceInfoExtension_t));
unreachable_handle_.push_back(p_device_info_extension);
// clang-format off
p_device_info_extension->devType = QNN_HTP_DEVICE_TYPE_ON_CHIP;
p_device_info_extension->onChipDevice.vtcmSize = target_machine_.soc_htp_vtcm_total_memory_size; // in MB
p_device_info_extension->onChipDevice.signedPdSupport = target_machine_.soc_htp_security_pd_session == QcomSecurityPDSession::kHtpSignedPd;
p_device_info_extension->onChipDevice.socModel = static_cast<uint32_t>(target_machine_.soc_htp_chipset);
p_device_info_extension->onChipDevice.arch = static_cast<QnnHtpDevice_Arch_t>(target_machine_.soc_htp_arch);
p_device_info_extension->onChipDevice.dlbcSupport = true;
p_hw_device_info->v1.deviceInfoExtension = p_device_info_extension;
// clang-format on

p_core_info = (QnnDevice_CoreInfo_t*)malloc(sizeof(QnnDevice_CoreInfo_t));
unreachable_handle_.push_back(p_core_info);
p_core_info->version = QNN_DEVICE_CORE_INFO_VERSION_1;
p_core_info->v1.coreId = 0;
p_core_info->v1.coreType = 0;
p_core_info->v1.coreInfoExtension = nullptr;
p_hw_device_info->v1.cores = p_core_info;

p_platform_info->v1.hwDevices = p_hw_device_info;
ret.push_back(p_platform_info);

return ret;
}
Comment on lines +251 to +294
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Add null checks after all malloc calls.

This method contains four malloc calls (lines 258, 263, 270, 282) without checking if allocation succeeded. If any malloc returns nullptr, the subsequent dereference will cause undefined behavior.

Add null checks after each malloc:

 p_platform_info = (QnnDevice_PlatformInfo_t*)malloc(sizeof(QnnDevice_PlatformInfo_t));
+if (p_platform_info == nullptr) {
+  MLLM_ERROR_EXIT(ExitCode::kCoreError, "Failed to allocate QnnDevice_PlatformInfo_t");
+}
 unreachable_handle_.push_back(p_platform_info);

Apply to all four allocations in this method. As per coding guidelines, proper error handling is essential for production-ready code.

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In mllm/backends/qnn/aot/QnnWrappersAPI.cpp around lines 251-294, each of the
four malloc calls must be followed by a nullptr check before any dereference or
pushing into unreachable_handle_; if malloc fails, cleanly handle the error by
freeing any prior allocations (and removing them from unreachable_handle_ if
needed), log or propagate an error and return an empty vector (or otherwise
abort construction) instead of continuing; move unreachable_handle_.push_back
calls to after successful allocation and ensure subsequent code never
dereferences a null pointer.


std::vector<QnnDevice_CustomConfig_t> QnnAOTEnv::createDecideCustomConfigInfo() {
std::vector<QnnDevice_CustomConfig_t> ret;

QnnHtpDevice_CustomConfig_t* p_custom_config = (QnnHtpDevice_CustomConfig_t*)malloc(sizeof(QnnHtpDevice_CustomConfig_t));
unreachable_handle_.push_back(p_custom_config);
p_custom_config->option = QNN_HTP_DEVICE_CONFIG_OPTION_SOC;
p_custom_config->socModel = static_cast<uint32_t>(target_machine_.soc_htp_chipset);
ret.push_back(static_cast<QnnDevice_CustomConfig_t>(p_custom_config));

return ret;
}
Comment on lines +296 to +306
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Add null check after malloc.

Line 299 allocates memory with malloc but doesn't verify the allocation succeeded before dereferencing the pointer.

 QnnHtpDevice_CustomConfig_t* p_custom_config = (QnnHtpDevice_CustomConfig_t*)malloc(sizeof(QnnHtpDevice_CustomConfig_t));
+if (p_custom_config == nullptr) {
+  MLLM_ERROR_EXIT(ExitCode::kCoreError, "Failed to allocate QnnHtpDevice_CustomConfig_t");
+}
 unreachable_handle_.push_back(p_custom_config);
🤖 Prompt for AI Agents
In mllm/backends/qnn/aot/QnnWrappersAPI.cpp around lines 296 to 306, the code
dereferences p_custom_config immediately after malloc without checking for
nullptr; add a null check after the malloc call and handle allocation failure
(e.g., log/error/throw or return empty vector) before using p_custom_config or
pushing it into unreachable_handle_; only proceed to set fields and push the
pointer into unreachable_handle_ and ret if the allocation succeeded.


std::vector<QnnContext_CustomConfig_t> QnnAOTEnv::createContextCustomConfig(bool weights_sharing) {
std::vector<QnnContext_CustomConfig_t> ret;
QnnHtpContext_CustomConfig_t* p_custom_config = nullptr;

if (weights_sharing) {
p_custom_config = (QnnHtpContext_CustomConfig_t*)malloc(sizeof(QnnHtpContext_CustomConfig_t));
unreachable_handle_.push_back(p_custom_config);
p_custom_config->option = QNN_HTP_CONTEXT_CONFIG_OPTION_WEIGHT_SHARING_ENABLED;
p_custom_config->weightSharingEnabled = true;
ret.push_back(static_cast<QnnContext_CustomConfig_t>(p_custom_config));
}

return ret;
}
Comment on lines +308 to +321
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Add null check after malloc.

Line 313 allocates memory with malloc inside the conditional block but doesn't check if allocation succeeded.

     p_custom_config = (QnnHtpContext_CustomConfig_t*)malloc(sizeof(QnnHtpContext_CustomConfig_t));
+    if (p_custom_config == nullptr) {
+      MLLM_ERROR_EXIT(ExitCode::kCoreError, "Failed to allocate QnnHtpContext_CustomConfig_t");
+    }
     unreachable_handle_.push_back(p_custom_config);
🤖 Prompt for AI Agents
In mllm/backends/qnn/aot/QnnWrappersAPI.cpp around lines 308 to 321, the malloc
call at line 313 may return nullptr; add an immediate null check after malloc
and handle allocation failure by not dereferencing or pushing the pointer: if
malloc returns nullptr, log or handle the error (e.g., return the empty vector
or throw std::bad_alloc) and avoid using p_custom_config, otherwise proceed to
initialize fields, push to unreachable_handle_, and push to ret.


} // namespace mllm::qnn::aot
24 changes: 21 additions & 3 deletions mllm/backends/qnn/aot/QnnWrappersAPI.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@
#include <unordered_map>

#include <QNN/QnnCommon.h>
#include <QNN/QnnContext.h>
#include <QNN/QnnInterface.h>
#include <QNN/QnnSdkBuildId.h>
#include <QNN/HTP/QnnHtpDevice.h>
#include <QNN/System/QnnSystemInterface.h>

#include "mllm/backends/qnn/aot/QnnTargetMachine.hpp"
#include "mllm/utils/Common.hpp"

namespace mllm::qnn::aot {
Expand Down Expand Up @@ -97,21 +99,37 @@ class QnnAOTEnv {
public:
using ptr_t = std::shared_ptr<QnnAOTEnv>;

QnnAOTEnv();
explicit QnnAOTEnv(QcomTargetMachine& target_machine);

explicit QnnAOTEnv(const std::string& lib_path);
QnnAOTEnv(const std::string& lib_path, QcomTargetMachine& target_machine);
Comment on lines +102 to +104
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Potential lifetime issue: storing reference parameter as member.

The constructors accept QcomTargetMachine& by reference and store it as member target_machine_ (line 123). If the caller's QcomTargetMachine object is destroyed before the QnnAOTEnv instance, the stored reference becomes dangling, leading to undefined behavior.

Consider one of these alternatives:

  • Store by value (copy/move the target machine)
  • Use std::shared_ptr<QcomTargetMachine> for shared ownership
  • Take const QcomTargetMachine& if the object is only read (requires verification in implementation)
  • At minimum, document the lifetime requirement clearly in comments


std::shared_ptr<QnnDeviceAndContext> createContext(const std::string& name);
std::shared_ptr<QnnDeviceAndContext> createContext(const std::string& name, bool weights_sharing = false);

void saveContext(const std::string& name, const std::string& path);

void destroyContext(const std::string& name);

// This is for All PUs, such as CPU, GPU, NPU
std::vector<QnnDevice_PlatformInfo_t*> createDevicePlatformInfo();

// This function is for NPU only.
std::vector<QnnDevice_CustomConfig_t> createDecideCustomConfigInfo();

std::vector<QnnContext_CustomConfig_t> createContextCustomConfig(bool weights_sharing);
Comment on lines +112 to +118
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Add destructor to clean up dynamically allocated memory.

These methods allocate memory (via malloc in the implementation) and store pointers in unreachable_handle_ for lifetime management. However, there is no destructor declared or implemented to free this memory, causing a leak.

Additionally, the name createDecideCustomConfigInfo is unclear - consider renaming to createDeviceCustomConfigInfo or similar.

Add destructor declaration in the header:

  QnnAOTEnv(const std::string& lib_path, QcomTargetMachine& target_machine);

+  ~QnnAOTEnv();
+
  std::shared_ptr<QnnDeviceAndContext> createContext(const std::string& name, bool weights_sharing = false);

And rename the method for clarity:

- std::vector<QnnDevice_CustomConfig_t> createDecideCustomConfigInfo();
+ std::vector<QnnDevice_CustomConfig_t> createDeviceCustomConfigInfo();
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
// This is for All PUs, such as CPU, GPU, NPU
std::vector<QnnDevice_PlatformInfo_t*> createDevicePlatformInfo();
// This function is for NPU only.
std::vector<QnnDevice_CustomConfig_t> createDecideCustomConfigInfo();
std::vector<QnnContext_CustomConfig_t> createContextCustomConfig(bool weights_sharing);
// This is for All PUs, such as CPU, GPU, NPU
std::vector<QnnDevice_PlatformInfo_t*> createDevicePlatformInfo();
// This function is for NPU only.
std::vector<QnnDevice_CustomConfig_t> createDeviceCustomConfigInfo();
std::vector<QnnContext_CustomConfig_t> createContextCustomConfig(bool weights_sharing);


private:
void _setup(const std::string& path = "");

QcomTargetMachine target_machine_;
Comment thread
coderabbitai[bot] marked this conversation as resolved.
QnnFuncSymbols qnn_htp_func_symbols_;
std::unordered_map<std::string, std::shared_ptr<QnnDeviceAndContext>> contexts_;

// device config for all to use
std::vector<QnnDevice_Config_t> target_machine_qnn_config_;
std::vector<const QnnDevice_Config_t*> target_machine_qnn_config_ptrs_;

// void* handle that should be freed when QnnAOTEnv end
std::vector<void*> unreachable_handle_;
};

} // namespace mllm::qnn::aot
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
4 changes: 3 additions & 1 deletion mllm/compile/ir/GeneratedRTTIKind.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Auto generated: 2025-11-26 11:54:51
// Auto generated: 2025-12-19 07:36:12
// do not modify this file
#pragma once

Expand Down Expand Up @@ -133,6 +133,8 @@ enum NodeKind : uint32_t {
RK_Val_Last,
RK_Attr,
RK_Attr_LinalgIRAttr,
RK_Attr_LinalgIRAttr_QuantizationAnnotation,
RK_Attr_LinalgIRAttr_Last,
RK_Attr_GraphIRAttr,
RK_Attr_TensorIRAttr,
RK_Attr_BuiltinIRAttr,
Expand Down
8 changes: 6 additions & 2 deletions mllm/compile/ir/NodeRTTIClassOfImpl.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Auto generated: 2025-11-26 11:54:51
// Auto generated: 2025-12-19 07:36:12
// do not modify this file
#pragma once
namespace mllm::ir {
Expand Down Expand Up @@ -325,7 +325,11 @@ struct NodeRTTIClassOfImpl {
#define RTTI_RK_ATTR_IMPL(v) return (v)->getKind() >= RK_Attr && (v)->getKind() <= RK_Attr_Last

#define RTTI_RK_ATTR_LINALGIRATTR_IMPL(v) \
return (v)->getKind() >= RK_Attr_LinalgIRAttr && (v)->getKind() <= RK_Attr_LinalgIRAttr
return (v)->getKind() >= RK_Attr_LinalgIRAttr && (v)->getKind() <= RK_Attr_LinalgIRAttr_Last

#define RTTI_RK_ATTR_LINALGIRATTR_QUANTIZATIONANNOTATION_IMPL(v) \
return (v)->getKind() >= RK_Attr_LinalgIRAttr_QuantizationAnnotation \
&& (v)->getKind() <= RK_Attr_LinalgIRAttr_QuantizationAnnotation

#define RTTI_RK_ATTR_GRAPHIRATTR_IMPL(v) return (v)->getKind() >= RK_Attr_GraphIRAttr && (v)->getKind() <= RK_Attr_GraphIRAttr

Expand Down
21 changes: 21 additions & 0 deletions mllm/compile/ir/linalg/Attribute.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright (c) MLLM Team.
// Licensed under the MIT License.

#include "mllm/compile/ir/linalg/Attribute.hpp"

namespace mllm::ir::linalg {

LinalgIRAttr::~LinalgIRAttr() = default;

LinalgIRAttr::LinalgIRAttr() : Attr(RK_Attr_LinalgIRAttr) {}

LinalgIRAttr::LinalgIRAttr(const NodeKind& kind) : Attr(kind) {}

LinalgIRQuantizatonAnnotationAttr::~LinalgIRQuantizatonAnnotationAttr() = default;

LinalgIRQuantizatonAnnotationAttr::LinalgIRQuantizatonAnnotationAttr()
: LinalgIRAttr(RK_Attr_LinalgIRAttr_QuantizationAnnotation) {}

LinalgIRQuantizatonAnnotationAttr::LinalgIRQuantizatonAnnotationAttr(const NodeKind& kind) : LinalgIRAttr(kind) {}
Comment on lines +14 to +19
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Typo in class name: LinalgIRQuantizatonAnnotationAttrLinalgIRQuantizationAnnotationAttr.

The class name is missing an 'i' in "Quantization". This typo appears throughout the codebase (header and implementation) and should be corrected for clarity.

🔎 Proposed fix
-LinalgIRQuantizatonAnnotationAttr::~LinalgIRQuantizatonAnnotationAttr() = default;
+LinalgIRQuantizationAnnotationAttr::~LinalgIRQuantizationAnnotationAttr() = default;

-LinalgIRQuantizatonAnnotationAttr::LinalgIRQuantizatonAnnotationAttr()
+LinalgIRQuantizationAnnotationAttr::LinalgIRQuantizationAnnotationAttr()
     : LinalgIRAttr(RK_Attr_LinalgIRAttr_QuantizationAnnotation) {}

-LinalgIRQuantizatonAnnotationAttr::LinalgIRQuantizatonAnnotationAttr(const NodeKind& kind) : LinalgIRAttr(kind) {}
+LinalgIRQuantizationAnnotationAttr::LinalgIRQuantizationAnnotationAttr(const NodeKind& kind) : LinalgIRAttr(kind) {}

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In mllm/compile/ir/linalg/Attribute.cpp around lines 14 to 19 the class and
constructors are misspelled as LinalgIRQuantizatonAnnotationAttr (missing 'i' in
"Quantization"); rename the class and all its constructors/destructor to
LinalgIRQuantizationAnnotationAttr, update the corresponding declaration in the
header, adjust every usage/forward-declaration/typedef/registration across the
codebase (search-and-replace the old spelling), regenerate any generated
bindings or CMake targets if names are referenced, and rebuild to ensure all
references are corrected.


} // namespace mllm::ir::linalg
Loading