UbiquitousLearning · chenghuaWang · Dec 19, 2025 · Dec 11, 2025 · Dec 11, 2025 · Dec 11, 2025
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -8,9 +8,15 @@ add_subdirectory(minicpm4)
 add_subdirectory(qwen3)
 add_subdirectory(qwen3_service)
 add_subdirectory(deepseek_ocr)
+
 if(MLLM_BUILD_QNN_BACKEND)
   add_subdirectory(qwen_npu)
 endif()
+
 if(MLLM_TRACY_ENABLE)
   add_subdirectory(tracy_example)
 endif()
+
+if(MLLM_QUALCOMM_QNN_AOT_ON_X86_ENABLE)
+  add_subdirectory(qwen3_qnn_aot)
+endif()
diff --git a/examples/qwen3_qnn_aot/CMakeLists.txt b/examples/qwen3_qnn_aot/CMakeLists.txt
@@ -168,6 +168,10 @@ if(MLLM_BUILD_ARM_BACKEND)
     PATTERN "*.h"
     PATTERN "*.hpp")
 else()
-  # X86 highway
-  # TODO
+  install(
+    TARGETS hwy
+    EXPORT MllmTargets
+    LIBRARY DESTINATION lib
+    ARCHIVE DESTINATION lib
+    RUNTIME DESTINATION bin)
 endif()
@@ -0,0 +1,67 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include <cstdint>
+
+namespace mllm::qnn::aot {
+
+enum class QcomHTPArch : uint32_t {
+  NONE = 0,
+  V68 = 68,
+  V69 = 69,
+  V73 = 73,
+  V75 = 75,
+  V79 = 79,
+  V81 = 81,
+};
+
+enum QcomChipset : uint32_t {
+  UNKNOWN_SM = 0,
+  SA8295 = 39,
+  SM8350 = 35,
+  SM8450 = 36,
+  SM8475 = 42,
+  SM8550 = 43,
+  SM8650 = 57,
+  SM8750 = 69,
+  SM8850 = 87,
+  SSG2115P = 46,
+  SSG2125P = 58,
+  SXR1230P = 45,
+  SXR2230P = 53,
+  SXR2330P = 75,
+  QCS9100 = 77,
+  SAR2230P = 95,
+  SA8255 = 52,
+  SW6100 = 96,
+};
+
+enum QcomTryBestPerformance : uint32_t {
+  kHtpDefault = 0,
+  kHtpSustainedHighPerformance,
+  kHtpBurst,
+  kHtpHighPerformance,
+  kHtpPowerSaver,
+  kHtpLowPowerSaver,
+  kHtpHighPowerSaver,
+  kHtpLowBalanced,
+  kHtpBalanced,
+};
+
+//  Protection Domain Session
+enum QcomSecurityPDSession : uint32_t {
+  kHtpUnsignedPd = 0,
+  kHtpSignedPd,
+};
+
+struct QcomTargetMachine {
+  QcomChipset soc_htp_chipset;
+  QcomHTPArch soc_htp_arch;
+  QcomTryBestPerformance soc_htp_performance;
+  QcomSecurityPDSession soc_htp_security_pd_session;
+  uint32_t soc_htp_vtcm_total_memory_size;
+};
+
+}  // namespace mllm::qnn::aot
@@ -1,6 +1,13 @@
 // Copyright (c) MLLM Team.
 // Licensed under the MIT License.
+#include <QNN/HTP/QnnHtpDevice.h>
+#include <QNN/HTP/QnnHtpCommon.h>
+#include <QNN/HTP/QnnHtpContext.h>
+
+#include "QnnContext.h"
+#include "mllm/utils/Common.hpp"
 #include "mllm/backends/qnn/aot/QnnWrappersAPI.hpp"
+#include "mllm/backends/qnn/aot/QnnTargetMachine.hpp"
 
 namespace mllm::qnn::aot {
 
@@ -67,9 +74,11 @@ bool QnnDynSymbolLoader::loadQnnDynLibAtPath(const std::string& path, const std:
   return false;
 }
 
-QnnAOTEnv::QnnAOTEnv() { _setup(); }
+QnnAOTEnv::QnnAOTEnv(QcomTargetMachine& target_machine) : target_machine_(target_machine) { _setup(); }
 
-QnnAOTEnv::QnnAOTEnv(const std::string& lib_path) { _setup(lib_path); }
+QnnAOTEnv::QnnAOTEnv(const std::string& lib_path, QcomTargetMachine& target_machine) : target_machine_(target_machine) {
+  _setup(lib_path);
+}
 
 void QnnAOTEnv::_setup(const std::string& path) {
   auto& loader = QnnDynSymbolLoader::instance();
@@ -121,9 +130,50 @@ void QnnAOTEnv::_setup(const std::string& path) {
 
     MLLM_RT_ASSERT(status != QNN_PROPERTY_ERROR_UNKNOWN_KEY);
   }
+
+  // Try to config this target machine
+  {
+    auto device_custom_config = createDecideCustomConfigInfo();
+    QnnHtpDevice_CustomConfig_t* p_custom_config = nullptr;
+
+    switch (target_machine_.soc_htp_security_pd_session) {
+      case QcomSecurityPDSession::kHtpSignedPd: {
+        p_custom_config = (QnnHtpDevice_CustomConfig_t*)malloc(sizeof(QnnHtpDevice_CustomConfig_t));
+        unreachable_handle_.push_back(p_custom_config);
+        p_custom_config->option = QNN_HTP_DEVICE_CONFIG_OPTION_SIGNEDPD;
+        p_custom_config->useSignedProcessDomain.useSignedProcessDomain = true;
+        p_custom_config->useSignedProcessDomain.deviceId = 0;
+        device_custom_config.push_back(static_cast<QnnDevice_CustomConfig_t>(p_custom_config));
+        break;
+      }
+      case QcomSecurityPDSession::kHtpUnsignedPd:
+      default: break;
+    }
+
+    const std::vector<QnnDevice_PlatformInfo_t*> device_platform_info = createDevicePlatformInfo();
+    uint32_t num_custom_configs = device_platform_info.size() + device_custom_config.size();
+    target_machine_qnn_config_.resize(num_custom_configs);
+
+    for (std::size_t i = 0; i < device_custom_config.size(); ++i) {
+      target_machine_qnn_config_[i].option = QNN_DEVICE_CONFIG_OPTION_CUSTOM;
+      target_machine_qnn_config_[i].customConfig = device_custom_config[i];
+      target_machine_qnn_config_ptrs_.push_back(&target_machine_qnn_config_[i]);
+    }
+
+    if (!device_platform_info.empty()) {
+      // The length of platform info can only be 1.
+      MLLM_RT_ASSERT_EQ(device_platform_info.size(), 1u);
+      target_machine_qnn_config_[device_custom_config.size()].option = QNN_DEVICE_CONFIG_OPTION_PLATFORM_INFO;
+      target_machine_qnn_config_[device_custom_config.size()].hardwareInfo = device_platform_info.back();
+      target_machine_qnn_config_ptrs_.push_back(&target_machine_qnn_config_[device_custom_config.size()]);
+    }
+
+    // null terminated
+    target_machine_qnn_config_ptrs_.push_back(nullptr);
+  }
 }
 
-std::shared_ptr<QnnDeviceAndContext> QnnAOTEnv::createContext(const std::string& name) {
+std::shared_ptr<QnnDeviceAndContext> QnnAOTEnv::createContext(const std::string& name, bool weights_sharing) {
   std::shared_ptr<QnnDeviceAndContext> context = std::make_shared<QnnDeviceAndContext>();
   context->name_ = name;
 
@@ -134,10 +184,9 @@ std::shared_ptr<QnnDeviceAndContext> QnnAOTEnv::createContext(const std::string&
   // clang-format on
 
   // 2. Create HTP Device
-  // FIXME(wch): we need to model each Hexagon machine with its special device info.
   // clang-format off
   if (nullptr != qnn_htp_func_symbols_.qnn_interface_.deviceCreate) {
-    auto status = qnn_htp_func_symbols_.qnn_interface_.deviceCreate(context->log_, nullptr, &context->device_handle_);
+    auto status = qnn_htp_func_symbols_.qnn_interface_.deviceCreate(context->log_, target_machine_qnn_config_ptrs_.data(), &context->device_handle_);
     MLLM_RT_ASSERT_EQ(status, QNN_SUCCESS);
   }
   // clang-format on
@@ -151,6 +200,9 @@ std::shared_ptr<QnnDeviceAndContext> QnnAOTEnv::createContext(const std::string&
 
   // 4. Create Context
   {
+    auto cfgs = createContextCustomConfig(weights_sharing);
+    // Current not support
+    MLLM_RT_ASSERT_EQ(cfgs.size(), 0);
     auto status = qnn_htp_func_symbols_.qnn_interface_.contextCreate(context->bk_handle_, context->device_handle_,
-    auto cfgs = createContextCustomConfig(weights_sharing);
-    // Current not support
-    MLLM_RT_ASSERT_EQ(cfgs.size(), 0);
-    auto status = qnn_htp_func_symbols_.qnn_interface_.contextCreate(context->bk_handle_, context->device_handle_,
+    // TODO: Wire context custom config when weight sharing support is added
+    MLLM_RT_ASSERT_EQ(weights_sharing, false);  // Weight sharing not yet supported
+    auto status = qnn_htp_func_symbols_.qnn_interface_.contextCreate(context->bk_handle_, context->device_handle_,
-    auto cfgs = createContextCustomConfig(weights_sharing);
-    // Current not support
-    MLLM_RT_ASSERT_EQ(cfgs.size(), 0);
-    auto status = qnn_htp_func_symbols_.qnn_interface_.contextCreate(context->bk_handle_, context->device_handle_,
+    // TODO: Wire context custom config when weight sharing support is added
+    MLLM_RT_ASSERT_EQ(weights_sharing, false);  // Weight sharing not yet supported
+    auto status = qnn_htp_func_symbols_.qnn_interface_.contextCreate(context->bk_handle_, context->device_handle_,
                                                                      (const QnnContext_Config_t**)&context->qnn_context_config_,
                                                                      &context->qnn_ctx_handle_);
@@ -196,4 +248,76 @@ void QnnAOTEnv::destroyContext(const std::string& name) {
   // TODO
 }
 
+std::vector<QnnDevice_PlatformInfo_t*> QnnAOTEnv::createDevicePlatformInfo() {
+  std::vector<QnnDevice_PlatformInfo_t*> ret;
+  QnnDevice_PlatformInfo_t* p_platform_info = nullptr;
+  QnnDevice_HardwareDeviceInfo_t* p_hw_device_info = nullptr;
+  QnnHtpDevice_DeviceInfoExtension_t* p_device_info_extension = nullptr;
+  QnnDevice_CoreInfo_t* p_core_info = nullptr;
+
+  p_platform_info = (QnnDevice_PlatformInfo_t*)malloc(sizeof(QnnDevice_PlatformInfo_t));
+  unreachable_handle_.push_back(p_platform_info);
+  p_platform_info->version = QNN_DEVICE_PLATFORM_INFO_VERSION_1;
+  p_platform_info->v1.numHwDevices = 1;
+
+  p_hw_device_info = (QnnDevice_HardwareDeviceInfo_t*)malloc(sizeof(QnnDevice_HardwareDeviceInfo_t));
+  unreachable_handle_.push_back(p_hw_device_info);
+  p_hw_device_info->version = QNN_DEVICE_HARDWARE_DEVICE_INFO_VERSION_1;
+  p_hw_device_info->v1.deviceId = 0;
+  p_hw_device_info->v1.deviceType = 0;
+  p_hw_device_info->v1.numCores = 1;
+
+  p_device_info_extension = (QnnHtpDevice_DeviceInfoExtension_t*)malloc(sizeof(QnnHtpDevice_DeviceInfoExtension_t));
+  unreachable_handle_.push_back(p_device_info_extension);
+  // clang-format off
+  p_device_info_extension->devType = QNN_HTP_DEVICE_TYPE_ON_CHIP;
+  p_device_info_extension->onChipDevice.vtcmSize = target_machine_.soc_htp_vtcm_total_memory_size;  // in MB
+  p_device_info_extension->onChipDevice.signedPdSupport = target_machine_.soc_htp_security_pd_session == QcomSecurityPDSession::kHtpSignedPd;
+  p_device_info_extension->onChipDevice.socModel = static_cast<uint32_t>(target_machine_.soc_htp_chipset);
+  p_device_info_extension->onChipDevice.arch = static_cast<QnnHtpDevice_Arch_t>(target_machine_.soc_htp_arch);
+  p_device_info_extension->onChipDevice.dlbcSupport = true;
+  p_hw_device_info->v1.deviceInfoExtension = p_device_info_extension;
+  // clang-format on
+
+  p_core_info = (QnnDevice_CoreInfo_t*)malloc(sizeof(QnnDevice_CoreInfo_t));
+  unreachable_handle_.push_back(p_core_info);
+  p_core_info->version = QNN_DEVICE_CORE_INFO_VERSION_1;
+  p_core_info->v1.coreId = 0;
+  p_core_info->v1.coreType = 0;
+  p_core_info->v1.coreInfoExtension = nullptr;
+  p_hw_device_info->v1.cores = p_core_info;
+
+  p_platform_info->v1.hwDevices = p_hw_device_info;
+  ret.push_back(p_platform_info);
+
+  return ret;
+}
+
+std::vector<QnnDevice_CustomConfig_t> QnnAOTEnv::createDecideCustomConfigInfo() {
+  std::vector<QnnDevice_CustomConfig_t> ret;
+
+  QnnHtpDevice_CustomConfig_t* p_custom_config = (QnnHtpDevice_CustomConfig_t*)malloc(sizeof(QnnHtpDevice_CustomConfig_t));
+  unreachable_handle_.push_back(p_custom_config);
+  p_custom_config->option = QNN_HTP_DEVICE_CONFIG_OPTION_SOC;
+  p_custom_config->socModel = static_cast<uint32_t>(target_machine_.soc_htp_chipset);
+  ret.push_back(static_cast<QnnDevice_CustomConfig_t>(p_custom_config));
+
+  return ret;
+}
+
+std::vector<QnnContext_CustomConfig_t> QnnAOTEnv::createContextCustomConfig(bool weights_sharing) {
+  std::vector<QnnContext_CustomConfig_t> ret;
+  QnnHtpContext_CustomConfig_t* p_custom_config = nullptr;
+
+  if (weights_sharing) {
+    p_custom_config = (QnnHtpContext_CustomConfig_t*)malloc(sizeof(QnnHtpContext_CustomConfig_t));
+    unreachable_handle_.push_back(p_custom_config);
+    p_custom_config->option = QNN_HTP_CONTEXT_CONFIG_OPTION_WEIGHT_SHARING_ENABLED;
+    p_custom_config->weightSharingEnabled = true;
+    ret.push_back(static_cast<QnnContext_CustomConfig_t>(p_custom_config));
+  }
+
+  return ret;
+}
+
 }  // namespace mllm::qnn::aot
@@ -14,11 +14,13 @@
 #include <unordered_map>
 
 #include <QNN/QnnCommon.h>
+#include <QNN/QnnContext.h>
 #include <QNN/QnnInterface.h>
 #include <QNN/QnnSdkBuildId.h>
 #include <QNN/HTP/QnnHtpDevice.h>
 #include <QNN/System/QnnSystemInterface.h>
 
+#include "mllm/backends/qnn/aot/QnnTargetMachine.hpp"
 #include "mllm/utils/Common.hpp"
 
 namespace mllm::qnn::aot {
@@ -97,21 +99,37 @@ class QnnAOTEnv {
  public:
   using ptr_t = std::shared_ptr<QnnAOTEnv>;
 
-  QnnAOTEnv();
+  explicit QnnAOTEnv(QcomTargetMachine& target_machine);
 
-  explicit QnnAOTEnv(const std::string& lib_path);
+  QnnAOTEnv(const std::string& lib_path, QcomTargetMachine& target_machine);
 
-  std::shared_ptr<QnnDeviceAndContext> createContext(const std::string& name);
+  std::shared_ptr<QnnDeviceAndContext> createContext(const std::string& name, bool weights_sharing = false);
 
   void saveContext(const std::string& name, const std::string& path);
 
   void destroyContext(const std::string& name);
 
+  // This is for All PUs, such as CPU, GPU, NPU
+  std::vector<QnnDevice_PlatformInfo_t*> createDevicePlatformInfo();
+
+  // This function is for NPU only.
+  std::vector<QnnDevice_CustomConfig_t> createDecideCustomConfigInfo();
+
+  std::vector<QnnContext_CustomConfig_t> createContextCustomConfig(bool weights_sharing);
-  // This is for All PUs, such as CPU, GPU, NPU
-  std::vector<QnnDevice_PlatformInfo_t*> createDevicePlatformInfo();
-
-  // This function is for NPU only.
-  std::vector<QnnDevice_CustomConfig_t> createDecideCustomConfigInfo();
-
-  std::vector<QnnContext_CustomConfig_t> createContextCustomConfig(bool weights_sharing);
+  // This is for All PUs, such as CPU, GPU, NPU
+  std::vector<QnnDevice_PlatformInfo_t*> createDevicePlatformInfo();
+
+  // This function is for NPU only.
+  std::vector<QnnDevice_CustomConfig_t> createDeviceCustomConfigInfo();
+
+  std::vector<QnnContext_CustomConfig_t> createContextCustomConfig(bool weights_sharing);
-  // This is for All PUs, such as CPU, GPU, NPU
-  std::vector<QnnDevice_PlatformInfo_t*> createDevicePlatformInfo();
-
-  // This function is for NPU only.
-  std::vector<QnnDevice_CustomConfig_t> createDecideCustomConfigInfo();
-
-  std::vector<QnnContext_CustomConfig_t> createContextCustomConfig(bool weights_sharing);
+  // This is for All PUs, such as CPU, GPU, NPU
+  std::vector<QnnDevice_PlatformInfo_t*> createDevicePlatformInfo();
+
+  // This function is for NPU only.
+  std::vector<QnnDevice_CustomConfig_t> createDeviceCustomConfigInfo();
+
+  std::vector<QnnContext_CustomConfig_t> createContextCustomConfig(bool weights_sharing);
+
  private:
   void _setup(const std::string& path = "");
 
+  QcomTargetMachine target_machine_;
   QnnFuncSymbols qnn_htp_func_symbols_;
   std::unordered_map<std::string, std::shared_ptr<QnnDeviceAndContext>> contexts_;
+
+  // device config for all to use
+  std::vector<QnnDevice_Config_t> target_machine_qnn_config_;
+  std::vector<const QnnDevice_Config_t*> target_machine_qnn_config_ptrs_;
+
+  // void* handle that should be freed when QnnAOTEnv end
+  std::vector<void*> unreachable_handle_;
 };
 
 }  // namespace mllm::qnn::aot
@@ -1,4 +1,4 @@
-// Auto generated: 2025-11-26 11:54:51
+// Auto generated: 2025-12-19 07:36:12
 // do not modify this file
 #pragma once
 
@@ -133,6 +133,8 @@ enum NodeKind : uint32_t {
   RK_Val_Last,
   RK_Attr,
   RK_Attr_LinalgIRAttr,
+  RK_Attr_LinalgIRAttr_QuantizationAnnotation,
+  RK_Attr_LinalgIRAttr_Last,
   RK_Attr_GraphIRAttr,
   RK_Attr_TensorIRAttr,
   RK_Attr_BuiltinIRAttr,

@@ -1,4 +1,4 @@
-// Auto generated: 2025-11-26 11:54:51
+// Auto generated: 2025-12-19 07:36:12
 // do not modify this file
 #pragma once
 namespace mllm::ir {
@@ -325,7 +325,11 @@ struct NodeRTTIClassOfImpl {
 #define RTTI_RK_ATTR_IMPL(v) return (v)->getKind() >= RK_Attr && (v)->getKind() <= RK_Attr_Last
 
 #define RTTI_RK_ATTR_LINALGIRATTR_IMPL(v) \
-  return (v)->getKind() >= RK_Attr_LinalgIRAttr && (v)->getKind() <= RK_Attr_LinalgIRAttr
+  return (v)->getKind() >= RK_Attr_LinalgIRAttr && (v)->getKind() <= RK_Attr_LinalgIRAttr_Last
+
+#define RTTI_RK_ATTR_LINALGIRATTR_QUANTIZATIONANNOTATION_IMPL(v)       \
+  return (v)->getKind() >= RK_Attr_LinalgIRAttr_QuantizationAnnotation \
+         && (v)->getKind() <= RK_Attr_LinalgIRAttr_QuantizationAnnotation
 
 #define RTTI_RK_ATTR_GRAPHIRATTR_IMPL(v) return (v)->getKind() >= RK_Attr_GraphIRAttr && (v)->getKind() <= RK_Attr_GraphIRAttr
 

@@ -0,0 +1,21 @@
+// Copyright (c) MLLM Team.
+// Licensed under the MIT License.
+
+#include "mllm/compile/ir/linalg/Attribute.hpp"
+
+namespace mllm::ir::linalg {
+
+LinalgIRAttr::~LinalgIRAttr() = default;
+
+LinalgIRAttr::LinalgIRAttr() : Attr(RK_Attr_LinalgIRAttr) {}
+
+LinalgIRAttr::LinalgIRAttr(const NodeKind& kind) : Attr(kind) {}
+
+LinalgIRQuantizatonAnnotationAttr::~LinalgIRQuantizatonAnnotationAttr() = default;
+
+LinalgIRQuantizatonAnnotationAttr::LinalgIRQuantizatonAnnotationAttr()
+    : LinalgIRAttr(RK_Attr_LinalgIRAttr_QuantizationAnnotation) {}
+
+LinalgIRQuantizatonAnnotationAttr::LinalgIRQuantizatonAnnotationAttr(const NodeKind& kind) : LinalgIRAttr(kind) {}
+
+}  // namespace mllm::ir::linalg