-
Notifications
You must be signed in to change notification settings - Fork 191
feat(qualcomm): Qnn aot runner #603
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a46b513
95ddcb8
42d4aee
492fa8e
1b27f17
4783198
fdb27ea
34895a5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,10 @@ | ||
| add_executable(mllm-qwen3-aot-c compile.cpp) | ||
| target_link_libraries(mllm-qwen3-aot-c PRIVATE MllmRT MllmCPUBackend MllmQNNBackend) | ||
| target_include_directories(mllm-qwen3-aot-c PRIVATE ${MLLM_INCLUDE_DIR}) | ||
| # AOT targets run on x86 | ||
| if(MLLM_QUALCOMM_QNN_AOT_ON_X86_ENABLE) | ||
| add_executable(mllm-qwen3-aot-c compile.cpp) | ||
| target_link_libraries(mllm-qwen3-aot-c PRIVATE MllmRT MllmCPUBackend MllmQNNBackend) | ||
| target_include_directories(mllm-qwen3-aot-c PRIVATE ${MLLM_INCLUDE_DIR}) | ||
| endif() | ||
|
|
||
| add_executable(mllm-qwen3-aot-runner aot_run.cpp) | ||
| target_link_libraries(mllm-qwen3-aot-runner PRIVATE MllmRT MllmCPUBackend MllmQNNBackend) | ||
| target_include_directories(mllm-qwen3-aot-runner PRIVATE ${MLLM_INCLUDE_DIR}) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| #include <iostream> | ||
| #include <fmt/core.h> | ||
| #include <mllm/mllm.hpp> | ||
| #include <string> | ||
| #include "mllm/backends/qnn/aot_rt/QnnAOTRuntime.hpp" | ||
| #include "mllm/models/qwen3/configuration_qwen3.hpp" | ||
| #include "mllm/models/qwen3/tokenization_qwen3.hpp" | ||
|
|
||
| using mllm::Argparse; | ||
| using namespace mllm::qnn::aot; // NOLINT | ||
|
|
||
| MLLM_MAIN({ | ||
| auto& help = Argparse::add<bool>("-h|--help").help("Show help message"); | ||
| auto& model_path = Argparse::add<std::string>("-m|--model").help("Model path").def("qwen3_qnn.mllm"); | ||
| auto& tokenizer_path = Argparse::add<std::string>("-t|--tokenizer").help("Tokenizer path").def("tokenizer.json"); | ||
| auto& config_path = Argparse::add<std::string>("-c|--config").help("Config path").required(true); | ||
| auto& temperature = Argparse::add<float>("--temperature").help("Temperature").def(0.8f); | ||
| auto& ar_len = Argparse::add<int>("--ar_len").help("Autoregressive length (chunk size)").def(128); | ||
|
|
||
| Argparse::parse(argc, argv); | ||
|
|
||
| mllm::initQnnBackend(model_path.get()); | ||
|
|
||
| if (help.isSet()) { | ||
| Argparse::printHelp(); | ||
| return 0; | ||
| } | ||
|
|
||
| auto qwen3_cfg = mllm::models::qwen3::Qwen3Config(config_path.get()); | ||
|
|
||
| RunnerConfig config; | ||
| config.model_path = model_path.get(); | ||
| config.temperature = temperature.get(); | ||
| config.num_layers = qwen3_cfg.num_hidden_layers; | ||
| config.num_heads = qwen3_cfg.num_attention_heads; | ||
| config.head_dim = qwen3_cfg.head_dim; | ||
| config.vocab_size = qwen3_cfg.vocab_size; | ||
| config.context_len = 1024; | ||
| config.ar_len = ar_len.get(); | ||
|
|
||
| auto tokenizer = mllm::models::qwen3::Qwen3Tokenizer(tokenizer_path.get()); | ||
|
|
||
| std::string prompt_text; | ||
| fmt::print("💬 Prompt text (or 'exit/quit'): "); | ||
| std::getline(std::cin, prompt_text); | ||
|
|
||
| auto input_tensor = tokenizer.convertMessage({.prompt = prompt_text}); | ||
|
|
||
| Runner runner(config, &tokenizer); | ||
| if (!runner.load()) { | ||
| std::cerr << "Failed to load model\n"; | ||
| return 1; | ||
| } | ||
|
|
||
| std::vector<uint64_t> prompt_tokens; | ||
| auto sequence = input_tensor["sequence"]; | ||
| int64_t* ptr = sequence.ptr<int64_t>(); | ||
| for (int i = 0; i < sequence.shape()[1]; ++i) { prompt_tokens.push_back((uint64_t)ptr[i]); } | ||
|
|
||
| runner.generate(prompt_tokens, config.context_len, [](const std::string& token) { std::cout << token << std::flush; }); | ||
| std::cout << "\n"; | ||
|
|
||
| return 0; | ||
| }); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,24 +55,6 @@ QNNBackend::QNNBackend() : Backend(kQNN, createQNNAllocator()) { | |
| MLLM_INFO("QNN backend supports early termination"); | ||
| } | ||
|
|
||
| bool contextStatus = false; | ||
| // check if the qnn_context.bin file exists | ||
| if (!std::filesystem::exists("qnn_context.bin")) { | ||
| contextStatus = runtime_->createContext(context_, nullptr); | ||
| } else { | ||
| contextStatus = runtime_->retrieveContext(context_, qnnModels_, nullptr); | ||
|
|
||
| // fill qnnModelIndexMap_ info according to qnnModels_ | ||
| for (size_t i = 0; i < qnnModels_.size(); i++) { | ||
| auto graphName = qnnModels_[i]->getQnnGraphName(); | ||
| qnnModelIndexMap_.insert(std::make_pair(graphName, i)); | ||
| } | ||
| } | ||
| if (!contextStatus) { MLLM_ERROR_EXIT(1, "Failed to create QNN context"); } | ||
|
|
||
| // init QNN Allocator | ||
| static_pointer_cast<QNNAllocator>(allocator_)->setQNNPointer(runtime_->qnnInterface, context_); | ||
|
|
||
| // set performance parameters for better performance on HTP | ||
| perf_ = QNNPerf::create(&runtime_->qnnInterface); | ||
| perf_->setPowerConfigBurst(); | ||
|
|
@@ -348,10 +330,10 @@ bool QNNRuntime::createContext(Qnn_ContextHandle_t& context, QnnContext_Config_t | |
| return true; | ||
| } | ||
|
|
||
| bool QNNRuntime::retrieveContext(Qnn_ContextHandle_t& context, std::vector<std::shared_ptr<QNNModel>>& qnnModels, | ||
| QnnContext_Config_t** contextConfig) { | ||
| bool QNNRuntime::retrieveContext(const std::string& contextBinaryPath, Qnn_ContextHandle_t& context, | ||
| std::vector<std::shared_ptr<QNNModel>>& qnnModels, QnnContext_Config_t** contextConfig) { | ||
| // Read the binary from qnn_context.bin and get the size in byte | ||
| std::ifstream file(QNN_Context_File, std::ios::binary | std::ios::ate); | ||
| std::ifstream file(contextBinaryPath, std::ios::binary | std::ios::ate); | ||
| std::streamsize size = file.tellg(); | ||
| file.seekg(0, std::ios::beg); | ||
|
|
||
|
|
@@ -436,6 +418,25 @@ bool QNNRuntime::retrieveContext(Qnn_ContextHandle_t& context, std::vector<std:: | |
| return true; | ||
| } | ||
|
|
||
| bool QNNBackend::createContext() { | ||
| if (!runtime_->createContext(context_, nullptr)) { return false; } | ||
| // init QNN Allocator | ||
| static_pointer_cast<QNNAllocator>(allocator_)->setQNNPointer(runtime_->qnnInterface, context_); | ||
| return true; | ||
| } | ||
|
|
||
| bool QNNBackend::loadContext(const std::string& contextPath) { | ||
| if (!runtime_->retrieveContext(contextPath, context_, qnnModels_, nullptr)) { return false; } | ||
| // fill qnnModelIndexMap_ info according to qnnModels_ | ||
| for (size_t i = 0; i < qnnModels_.size(); i++) { | ||
| auto graphName = qnnModels_[i]->getQnnGraphName(); | ||
| qnnModelIndexMap_.insert(std::make_pair(graphName, i)); | ||
| } | ||
| // init QNN Allocator | ||
| static_pointer_cast<QNNAllocator>(allocator_)->setQNNPointer(runtime_->qnnInterface, context_); | ||
| return true; | ||
| } | ||
|
|
||
| std::shared_ptr<QNNModel> QNNBackend::createQnnGraph(const std::string& graphName) { | ||
| // If the graph already exists, return the existing model | ||
| if (qnnModelIndexMap_.find(graphName) != qnnModelIndexMap_.end()) { | ||
|
|
@@ -535,8 +536,6 @@ void QNNBackend::graphExecute(const std::string& graphName, std::vector<Tensor>& | |
| return; | ||
| } | ||
|
|
||
| // Prepare QNN input tensors by copying data from runtime inputs to graph input wrappers | ||
| // This handles the case where input tensor sizes may differ between prefill and decode phases | ||
| std::vector<Qnn_Tensor_t> qnn_inputs; | ||
| std::vector<Qnn_Tensor_t> qnn_outputs; | ||
| for (int i = 0; i < model->getGraphInputTensorWrappers().size(); i++) { | ||
|
|
@@ -550,52 +549,8 @@ void QNNBackend::graphExecute(const std::string& graphName, std::vector<Tensor>& | |
| return; | ||
| } | ||
|
|
||
| if (wrapper_tensor.isNil()) { | ||
| MLLM_ERROR("Graph input wrapper {} for graph '{}' has no backing tensor", i, graphName); | ||
| return; | ||
| } | ||
|
|
||
| // Check for size mismatches (can occur in decode phase where inputs may be smaller) | ||
| size_t dst_bytes = wrapper_tensor.bytes(); | ||
| size_t src_bytes = runtime_input.bytes(); | ||
| if (dst_bytes != src_bytes) { | ||
| MLLM_WARN("Graph '{}' input tensor {} byte-size mismatch: wrapper={} bytes, runtime input={} bytes. Copying " | ||
| "min(dst, src), but this may truncate data.", | ||
| graphName, i, dst_bytes, src_bytes); | ||
| } | ||
|
|
||
| if (dst_bytes > 0) { | ||
| void* dst_ptr = wrapper_tensor.ptr<void>(); | ||
| if (!dst_ptr) { | ||
| wrapper_tensor.alloc(); | ||
| dst_ptr = wrapper_tensor.ptr<void>(); | ||
| } | ||
|
|
||
| const void* src_ptr = runtime_input.ptr<void>(); | ||
| size_t bytes_to_copy = std::min(dst_bytes, src_bytes); | ||
| if (!src_ptr) { | ||
| MLLM_ERROR("Runtime input tensor {} for graph '{}' has null data pointer", i, graphName); | ||
| return; | ||
| } | ||
| if (dst_ptr && src_ptr && dst_ptr != src_ptr) { | ||
| // Copy source data to destination buffer | ||
| // This ensures that the graph input wrapper has the correct data for execution | ||
| if (bytes_to_copy > 0) { std::memcpy(dst_ptr, src_ptr, bytes_to_copy); } | ||
|
|
||
| // If source is smaller than destination, zero out the remaining bytes | ||
| // This is important for decode phase where input tensors may be smaller than prefill | ||
| // For example, decode phase may use [1, 1] input while wrapper expects [1, 128] | ||
| // Note: In current implementation with full [1, 128] tensor, this should not trigger | ||
| // but it's kept as a safety measure for future optimizations | ||
| if (src_bytes < dst_bytes) { | ||
| size_t remaining_bytes = dst_bytes - src_bytes; | ||
| std::memset(static_cast<char*>(dst_ptr) + bytes_to_copy, 0, remaining_bytes); | ||
| // Only log if zero-padding actually occurs (unexpected case) | ||
| MLLM_WARN("[QNN graphExecute] Graph '{}' input tensor {}: zero-padded {} bytes (src={} bytes, dst={} bytes)", | ||
| graphName, i, remaining_bytes, src_bytes, dst_bytes); | ||
| } | ||
| } | ||
| } | ||
| // input wrapper is empty, set wrapper's dataContainer(mllm::Tensor) | ||
| if (!wrapper->isAlloc()) { wrapper->__setDataContainer(runtime_input); } | ||
|
Comment on lines
+552
to
+553
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Search for usage patterns of __setDataContainer and isAlloc
rg -n -C3 '__setDataContainer|isAlloc' --type=cppRepository: UbiquitousLearning/mllm Length of output: 3427 🏁 Script executed: #!/bin/bash
# Get broader context around line 552-553 in QNNBackend.cpp
sed -n '520,580p' mllm/backends/qnn/QNNBackend.cpp
echo "---"
# Search for wrapper creation/reuse patterns
rg -n 'wrapper\s*=' mllm/backends/qnn/QNNBackend.cpp | head -20Repository: UbiquitousLearning/mllm Length of output: 2855 🏁 Script executed: #!/bin/bash
# Search for wrapper lifecycle - how are input wrappers stored and reused?
rg -n 'getGraphInputTensorWrappers' mllm/backends/qnn/ --type=cpp
echo "---"
# Search for reset/cleanup methods in QNNTensorWrapper
rg -n 'reset|clear|cleanup' mllm/backends/qnn/QNNUtils.hpp --type=hppRepository: UbiquitousLearning/mllm Length of output: 827 🏁 Script executed: #!/bin/bash
# Search for reset/cleanup methods in QNNTensorWrapper without type specification
rg -n 'reset|clear|cleanup' mllm/backends/qnn/QNNUtils.hpp
echo "---"
# Look at the full QNNTensorWrapper class definition
sed -n '180,230p' mllm/backends/qnn/QNNUtils.hppRepository: UbiquitousLearning/mllm Length of output: 2348 🏁 Script executed: #!/bin/bash
# Check how wrappers are created and stored in the model
rg -n 'inputTensorWrappers_' mllm/backends/qnn/QNNModel.cpp -A2 -B2 | head -40
echo "---"
# Check if there's any mechanism to reset or recreate wrappers between executions
rg -n 'graphExecute' mllm/backends/qnn/QNNBackend.cpp | head -5Repository: UbiquitousLearning/mllm Length of output: 1164 🏁 Script executed: #!/bin/bash
# Understand the alloc() method and whether it updates data
sed -n '484,496p' mllm/backends/qnn/QNNUtils.cpp
echo "---"
# Check the create method to understand initial wrapper state
rg -n 'QNNTensorWrapper::create' mllm/backends/qnn/QNNUtils.cpp -A15 | head -40Repository: UbiquitousLearning/mllm Length of output: 2599 🏁 Script executed: #!/bin/bash
# Check the lifecycle of the wrapper between multiple executions
# Does graphExecute clear or reset wrappers?
rg -n 'clear|reset|new ' mllm/backends/qnn/QNNBackend.cpp | grep -i wrapper
echo "---"
# Check how the input wrapper is actually used after alloc() in graphExecute
sed -n '553,572p' mllm/backends/qnn/QNNBackend.cppRepository: UbiquitousLearning/mllm Length of output: 1138 🏁 Script executed: #!/bin/bash
# Final verification: Check if there's any mechanism to reset isAlloc_ or update data between executions
rg -n 'isAlloc_\s*=' mllm/backends/qnn/ --type=cpp
echo "---"
# Check comment or documentation about wrapper reuse
rg -n 'reuse|lifecycle|between execution' mllm/backends/qnn/QNNUtils.hpp
echo "---"
# Look for any destructor or cleanup in QNNModel
rg -n '~QNNModel|reset.*wrapper' mllm/backends/qnn/QNNModel.hppRepository: UbiquitousLearning/mllm Length of output: 366 🏁 Script executed: #!/bin/bash
# Final critical check: verify what happens on second execution
# Look at the actual behavior flow
echo "=== First Execution Flow ==="
echo "1. !wrapper->isAlloc() is TRUE (initial state: isAlloc_ = false)"
echo "2. Calls __setDataContainer(runtime_input) - sets dataContainer_ to runtime_input"
echo "3. Calls wrapper->alloc() - sets isAlloc_ = true"
echo ""
echo "=== Second Execution Flow ==="
echo "1. !wrapper->isAlloc() is FALSE (already allocated)"
echo "2. SKIPS __setDataContainer() - wrapper->dataContainer_ NOT updated"
echo "3. Calls wrapper->alloc() again - but dataContainer_ still has old data"
echo ""
# Verify the key insight: once isAlloc_ is true, it never becomes false
rg -n 'isAlloc_\s*=\s*false' mllm/backends/qnn/Repository: UbiquitousLearning/mllm Length of output: 575 Fix stale data bug when wrappers are reused across multiple graph executions. The current logic skips data updates when To fix this, consider either:
🤖 Prompt for AI Agents |
||
|
|
||
| // Allocate and register the wrapper tensor with QNN allocator | ||
| // QNNAllocator will handle registered memory descriptor when needed | ||
|
|
@@ -617,74 +572,18 @@ void QNNBackend::graphExecute(const std::string& graphName, std::vector<Tensor>& | |
|
|
||
| if (ProfilingLevel::OFF != profilingLevel_) { extractBackendProfilingInfo(runtime_->profileHandle); } | ||
|
|
||
| // Debug: Print last output shape from QNN actual return order (before reordering) | ||
| // Uncomment below for debugging output order issues | ||
| // if (!qnn_output_tensors.empty()) { | ||
| // const auto& last_output = qnn_output_tensors.back(); | ||
| // const auto& output_wrappers = model->getGraphOutputTensorWrappers(); | ||
| // const auto& last_wrapper = output_wrappers.back(); | ||
| // MLLM_INFO("[QNN Actual Return Order] Last output tensor '{}' shape: {}", | ||
| // last_wrapper->getName(), last_output.shape()); | ||
| // } | ||
|
|
||
| // Reorder outputs according to MLLM expected order | ||
| const auto& expectedOrder = model->getExpectedOutputOrder(); | ||
|
|
||
| // Resize outputs to match QNN output count first | ||
| outputs.resize(qnn_output_tensors.size()); // Ensure outputs has enough space for all QNN outputs | ||
| if (!expectedOrder.empty() && expectedOrder.size() == qnn_output_tensors.size()) { | ||
| // Debug: Log output order information | ||
| // Uncomment below for debugging output order issues | ||
| // MLLM_INFO("QNNBackend::graphExecute: Checking output order for graph '{}'", graphName); | ||
| // MLLM_INFO(" MLLM Expected Output Order ({} outputs):", expectedOrder.size()); | ||
| // for (size_t i = 0; i < expectedOrder.size(); i++) { | ||
| // MLLM_INFO(" [{}] {}", i, expectedOrder[i]); | ||
| // } | ||
| // MLLM_INFO(" QNN Output Order ({} outputs):", model->getGraphOutputTensorWrappers().size()); | ||
| // for (size_t i = 0; i < model->getGraphOutputTensorWrappers().size(); i++) { | ||
| // auto wrapper = model->getGraphOutputTensorWrappers()[i]; | ||
| // MLLM_INFO(" [{}] {}", i, wrapper->getName()); | ||
| // } | ||
|
|
||
| // Check if reordering is needed | ||
| // bool needs_reordering = false; | ||
| // std::vector<std::pair<size_t, int>> mismatches; | ||
| // for (size_t i = 0; i < expectedOrder.size(); i++) { | ||
| // const std::string& expected_name = expectedOrder[i]; | ||
| // int qnn_index = model->getQnnOutputIndex(expected_name); | ||
| // if (qnn_index >= 0 && qnn_index < static_cast<int>(qnn_output_tensors.size())) { | ||
| // if (static_cast<int>(i) != qnn_index) { | ||
| // needs_reordering = true; | ||
| // mismatches.emplace_back(i, qnn_index); | ||
| // } | ||
| // } | ||
| // } | ||
|
|
||
| // Debug: Verification messages | ||
| // Uncomment below for debugging output order issues | ||
| // if (needs_reordering) { | ||
| // MLLM_INFO(" [VERIFICATION] QNN output order DIFFERS from MLLM expected order - REORDERING REQUIRED"); | ||
| // for (const auto& [mllm_idx, qnn_idx] : mismatches) { | ||
| // MLLM_INFO(" Mismatch: MLLM[{}] expects '{}' but it's at QNN[{}]", | ||
| // mllm_idx, expectedOrder[mllm_idx], qnn_idx); | ||
| // } | ||
| // } else { | ||
| // MLLM_INFO(" [VERIFICATION] QNN output order MATCHES MLLM expected order - no reordering needed"); | ||
| // } | ||
|
|
||
| // Reorder outputs according to expected order | ||
| for (size_t i = 0; i < expectedOrder.size(); i++) { | ||
| const std::string& expected_name = expectedOrder[i]; | ||
| int qnn_index = model->getQnnOutputIndex(expected_name); | ||
| if (qnn_index >= 0 && qnn_index < static_cast<int>(qnn_output_tensors.size())) { | ||
| outputs[i] = qnn_output_tensors[qnn_index]; | ||
| // Debug: Mapping information | ||
| // Uncomment below for debugging output order issues | ||
| // if (static_cast<int>(i) != qnn_index) { | ||
| // MLLM_INFO(" Mapping: MLLM[{}] = QNN[{}] (tensor: {}) [REORDERED]", i, qnn_index, expected_name); | ||
| // } else { | ||
| // MLLM_INFO(" Mapping: MLLM[{}] = QNN[{}] (tensor: {}) [SAME]", i, qnn_index, expected_name); | ||
| // } | ||
| } else { | ||
| MLLM_ERROR("QNNBackend::graphExecute: Failed to find QNN output index for tensor '{}' in graph '{}'", expected_name, | ||
| graphName); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -50,8 +50,8 @@ class QNNRuntime { | |
| } | ||
|
|
||
| bool createContext(Qnn_ContextHandle_t& context, QnnContext_Config_t** contextConfig = nullptr); | ||
| bool retrieveContext(Qnn_ContextHandle_t& context, std::vector<std::shared_ptr<QNNModel>>& qnnModels, | ||
| QnnContext_Config_t** contextConfig = nullptr); | ||
| bool retrieveContext(const std::string& contextBinaryPath, Qnn_ContextHandle_t& context, | ||
| std::vector<std::shared_ptr<QNNModel>>& qnnModels, QnnContext_Config_t** contextConfig = nullptr); | ||
|
Comment on lines
+53
to
+54
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# First, find and read the header file to confirm the signature change
find . -name "QNNBackend.hpp" -type fRepository: UbiquitousLearning/mllm Length of output: 101 🏁 Script executed: # Search for all usages of retrieveContext in the codebase
rg -n "retrieveContext" --type=cpp --type=h -C 3Repository: UbiquitousLearning/mllm Length of output: 1879 🏁 Script executed: # Search more comprehensively for all retrieveContext usages, including in tests and other files
rg -n "retrieveContext" --type=cpp --type=h --type=ccRepository: UbiquitousLearning/mllm Length of output: 93 🏁 Script executed: # Read the header file to check for docstrings and API documentation
cat -n mllm/backends/qnn/QNNBackend.hpp | head -80Repository: UbiquitousLearning/mllm Length of output: 3618 🏁 Script executed: # Also search in Python files in case there are bindings
rg -n "retrieveContext" --type=pyRepository: UbiquitousLearning/mllm Length of output: 49 🏁 Script executed: # Search more comprehensively across all source files
rg -n "retrieveContext" --type=cppRepository: UbiquitousLearning/mllm Length of output: 464 🏁 Script executed: # Check if there are any other file types that might use this API
find . -type f \( -name "*.cpp" -o -name "*.h" -o -name "*.hpp" \) | wc -lRepository: UbiquitousLearning/mllm Length of output: 71 🏁 Script executed: # Look at the complete context around the method declaration to check for docstrings
sed -n '40,60p' mllm/backends/qnn/QNNBackend.hppRepository: UbiquitousLearning/mllm Length of output: 874 🏁 Script executed: # Check if createContext has docstrings (line 52)
sed -n '45,56p' mllm/backends/qnn/QNNBackend.hppRepository: UbiquitousLearning/mllm Length of output: 686 🏁 Script executed: # Check the complete implementation to understand the context better
sed -n '333,360p' mllm/backends/qnn/QNNBackend.cppRepository: UbiquitousLearning/mllm Length of output: 1343 🏁 Script executed: # Verify no other files in the project might be calling this method
find . -type f \( -name "*.cpp" -o -name "*.h" -o -name "*.hpp" \) -exec grep -l "QNNRuntime\|QNNBackend" {} \; | head -20Repository: UbiquitousLearning/mllm Length of output: 665 🏁 Script executed: # Check if there are any examples or test files that might use the old API
find . -type f -name "*test*" -o -name "*example*" | grep -i qnnRepository: UbiquitousLearning/mllm Length of output: 49 🏁 Script executed: # Double-check for any remaining usages of the old signature or other related calls
rg -n "retrieveContext\|loadContext\|createContext" mllm/backends/qnn/QNNBackend.cpp -A 2Repository: UbiquitousLearning/mllm Length of output: 49 🏁 Script executed: # Check if the coding guideline about docstrings is being violated
# Look at similar public methods in other backends to see the pattern
find . -type f -name "*.hpp" -path "*/backends/*" -exec grep -l "bool.*(" {} \; | head -5 | xargs head -100Repository: UbiquitousLearning/mllm Length of output: 9600 🏁 Script executed: # Verify the specific lines mentioned in the review (53-54) and surrounding context for any documentation
sed -n '48,58p' mllm/backends/qnn/QNNBackend.hppRepository: UbiquitousLearning/mllm Length of output: 662 🏁 Script executed: # Verify no trailing whitespace or other formatting issues in the header file
sed -n '52,55p' mllm/backends/qnn/QNNBackend.hpp | cat -ARepository: UbiquitousLearning/mllm Length of output: 383 🏁 Script executed: # Check if there are any docstring patterns used elsewhere in QNN backend files
rg -n "///|//!" mllm/backends/qnn/QNNBackend.hpp | head -20Repository: UbiquitousLearning/mllm Length of output: 49 🏁 Script executed: # Check Base Backend header for documentation patterns
rg -n "bool.*\(.*\);" mllm/backends/base/Backend.hpp -B 2 | head -30Repository: UbiquitousLearning/mllm Length of output: 49 🏁 Script executed: # Check the entire QNNRuntime class declaration to see if there are any docstrings at all
sed -n '41,100p' mllm/backends/qnn/QNNBackend.hppRepository: UbiquitousLearning/mllm Length of output: 2621 🏁 Script executed: # Look at the implementation to see if docstrings are in the .cpp file instead
sed -n '325,340p' mllm/backends/qnn/QNNBackend.cppRepository: UbiquitousLearning/mllm Length of output: 795 🏁 Script executed: # Check how many methods in QNNRuntime lack documentation
grep -n "bool\|void" mllm/backends/qnn/QNNBackend.hpp | grep -A 1 "QNNRuntime" -m 20Repository: UbiquitousLearning/mllm Length of output: 49 🏁 Script executed: # Check if these methods are newly added or modified (look at git context if available)
git log --oneline -1 mllm/backends/qnn/QNNBackend.hpp 2>/dev/null || echo "Git history not available"Repository: UbiquitousLearning/mllm Length of output: 125 🏁 Script executed: # Verify the coding guideline compliance by checking if similar public methods in this file have docstrings
sed -n '85,130p' mllm/backends/qnn/QNNBackend.hppRepository: UbiquitousLearning/mllm Length of output: 2112 Add docstrings to public API methods. The 🤖 Prompt for AI Agents |
||
|
|
||
| private: | ||
| QNN_INTERFACE_VER_TYPE qnnInterface; | ||
|
|
@@ -87,6 +87,9 @@ class QNNBackend final : public Backend { | |
| public: | ||
| QNNBackend(); | ||
|
|
||
| bool loadContext(const std::string& contextPath); | ||
| bool createContext(); | ||
|
|
||
| bool isWeightOnDevice() override { return false; } | ||
|
|
||
| // QNN Graph build interfaces | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.