diff --git a/backends/arm/runtime/EthosUBackend.cpp b/backends/arm/runtime/EthosUBackend.cpp index 51ebb3742e1..cb46f7d32c3 100644 --- a/backends/arm/runtime/EthosUBackend.cpp +++ b/backends/arm/runtime/EthosUBackend.cpp @@ -84,6 +84,8 @@ typedef struct { extern "C" { void __attribute__((weak)) EthosUBackend_execute_begin() {} void __attribute__((weak)) EthosUBackend_execute_end() {} +__attribute__((weak)) unsigned char* ethosu_fast_scratch = nullptr; +__attribute__((weak)) size_t ethosu_fast_scratch_size = 0; } class EthosUBackendExecuteCallbacks { @@ -198,8 +200,8 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface { handles.weight_data_size, ethosu_scratch, handles.scratch_data_size, - nullptr, - 0); + ethosu_fast_scratch, + ethosu_fast_scratch_size); // Write argument values (from EValue tensor) into Ethos-U scratch // TODO(MLETORCH-123): Optimise into direct write from Vela into the SRAM @@ -309,9 +311,12 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface { static_cast( reinterpret_cast((handles.weight_data))), static_cast(reinterpret_cast(ethosu_scratch)), - 0}; + static_cast( + reinterpret_cast(ethosu_fast_scratch))}; size_t bases_size[ETHOSU_NUM_BASE_ADDRS] = { - handles.weight_data_size, handles.scratch_data_size, 0}; + handles.weight_data_size, + handles.scratch_data_size, + ethosu_fast_scratch_size}; int result = 0; EXECUTORCH_PROF_START( event_tracer, event_tracer_local_scope, "+EthosUBackend::execute()NPU"); @@ -321,7 +326,7 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface { handles.cmd_data_size, bases, bases_size, - 3, /* fixed array of pointers to binary interface*/ + ETHOSU_NUM_BASE_ADDRS, /* fixed array of pointers to binary interface*/ nullptr); EXECUTORCH_PROF_END(event_tracer, event_tracer_local_scope); diff --git a/backends/arm/scripts/build_executor_runner.sh b/backends/arm/scripts/build_executor_runner.sh index 807821d427f..9e2f3954c53 100755 --- a/backends/arm/scripts/build_executor_runner.sh +++ b/backends/arm/scripts/build_executor_runner.sh @@ -103,7 +103,7 @@ then memory_mode="Shared_Sram" if [[ ${target} =~ "ethos-u85" ]] then - memory_mode="Sram_Only" + memory_mode="Dedicated_Sram_384KB" fi fi diff --git a/backends/arm/test/test_arm_baremetal.sh b/backends/arm/test/test_arm_baremetal.sh index b0480792fdb..ac3d81d21a1 100755 --- a/backends/arm/test/test_arm_baremetal.sh +++ b/backends/arm/test/test_arm_baremetal.sh @@ -211,10 +211,7 @@ test_models_ethos-u85() { # End to End model tests using model_test.py python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-512 --model=mv3 --extra_flags="-DET_ATOL=5.00 -DET_RTOL=5.00" python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=lstm --extra_flags="-DET_ATOL=0.03 -DET_RTOL=0.03" python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-128 --model=w2l --extra_flags="-DET_ATOL=0.01 -DET_RTOL=0.01" - # Temporarily not test inception_v4 on Ethos-U85. To support inception_v4 properly on Ethos-U85, we need to run the model in Dedicated_Sram memory mode with - # 384KB(or another amount lower than 2MB) of SRAM passed as fast scratch area. The PR adding support for Dedicated_Sram(https://github.com/pytorch/executorch/pull/10714) - # was reverted due to a change required in an internal variant of the examples/arm/executor_runner/arm_executor_runner.cpp - # python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=ic4 --extra_flags="-DET_ATOL=0.8 -DET_RTOL=0.8" --timeout=2400 + python3 backends/arm/test/test_model.py --test_output=arm_test/test_model --target=ethos-u85-256 --model=ic4 --extra_flags="-DET_ATOL=0.8 -DET_RTOL=0.8" --timeout=2400 echo "${TEST_SUITE_NAME}: PASS" } diff --git a/backends/arm/test/test_model.py b/backends/arm/test/test_model.py index b0fd2f2a381..072583ef862 100755 --- a/backends/arm/test/test_model.py +++ b/backends/arm/test/test_model.py @@ -81,7 +81,7 @@ def get_args(): if "u55" in args.target: args.memory_mode = "Shared_Sram" elif "u85" in args.target: - args.memory_mode = "Sram_Only" + args.memory_mode = "Dedicated_Sram_384KB" else: raise RuntimeError(f"Invalid target name {args.target}") diff --git a/examples/arm/executor_runner/arm_executor_runner.cpp b/examples/arm/executor_runner/arm_executor_runner.cpp index e5313345f6c..debc955dcc0 100644 --- a/examples/arm/executor_runner/arm_executor_runner.cpp +++ b/examples/arm/executor_runner/arm_executor_runner.cpp @@ -145,23 +145,15 @@ const size_t temp_allocation_pool_size = unsigned char __attribute__(( section(".bss.tensor_arena"), aligned(16))) temp_allocation_pool[temp_allocation_pool_size]; - -namespace executorch { -namespace backends { -namespace arm { #if defined(ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE) +extern "C" { size_t ethosu_fast_scratch_size = ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE; unsigned char __attribute__((section(".bss.ethosu_scratch"), aligned(16))) dedicated_sram[ET_ARM_BAREMETAL_FAST_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE]; unsigned char* ethosu_fast_scratch = dedicated_sram; -#else -size_t ethosu_fast_scratch_size = 0; -unsigned char* ethosu_fast_scratch = nullptr; +} #endif -} // namespace arm -} // namespace backends -} // namespace executorch void et_pal_init(void) { // Enable ARM PMU Clock diff --git a/examples/arm/run.sh b/examples/arm/run.sh index 89ac5cd30a8..750c251596c 100755 --- a/examples/arm/run.sh +++ b/examples/arm/run.sh @@ -110,7 +110,7 @@ then memory_mode="Shared_Sram" if [[ ${target} =~ "ethos-u85" ]] then - memory_mode="Sram_Only" + memory_mode="Dedicated_Sram_384KB" fi fi