From a36200411180d5d8b9501c90937629117317d0bb Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Sun, 12 Sep 2021 11:25:30 -0700 Subject: [PATCH 01/22] [microTVM] Update support for ARMv7m intrinsic - Improved implementaion of gemm function for conv2d - Removed %4 restriction for channels - Added test case to verify SMLAD intrinsic speed acceleration Signed-off-by: Sergey Smirnov --- .../zephyr/base-box/base_box_test.sh | 2 + python/tvm/relay/op/strategy/arm_cpu.py | 3 +- .../arm_cpu/cortex_m7/conv2d/direct_simd.py | 10 +- .../arm_cpu/cortex_m7/micro_kernel/gemm.py | 326 ++++++++++++++++-- tests/micro/testdata/armv7m/digit-2.jpg | Bin 0 -> 572 bytes .../testdata/armv7m/mnist_model_quant.tflite | Bin 0 -> 29888 bytes tests/micro/zephyr/test_zephyr_armv7m.py | 293 ++++++++++++++++ 7 files changed, 603 insertions(+), 31 deletions(-) create mode 100644 tests/micro/testdata/armv7m/digit-2.jpg create mode 100644 tests/micro/testdata/armv7m/mnist_model_quant.tflite create mode 100644 tests/micro/zephyr/test_zephyr_armv7m.py diff --git a/apps/microtvm/reference-vm/zephyr/base-box/base_box_test.sh b/apps/microtvm/reference-vm/zephyr/base-box/base_box_test.sh index 2a023b520b01..fd8bb7a2c24a 100755 --- a/apps/microtvm/reference-vm/zephyr/base-box/base_box_test.sh +++ b/apps/microtvm/reference-vm/zephyr/base-box/base_box_test.sh @@ -37,3 +37,5 @@ if [ $board == "stm32f746xx" ]; then else pytest tests/micro/zephyr/test_zephyr_aot.py --zephyr-board=${board} fi + +pytest tests/micro/zephyr/test_zephyr_armv7m.py --zephyr-board=${board} \ No newline at end of file diff --git a/python/tvm/relay/op/strategy/arm_cpu.py b/python/tvm/relay/op/strategy/arm_cpu.py index 005eae68b8b7..f3d0c1af0a85 100644 --- a/python/tvm/relay/op/strategy/arm_cpu.py +++ b/python/tvm/relay/op/strategy/arm_cpu.py @@ -127,8 +127,7 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target): name="conv2d_hwcn.generic", ) elif layout == "NHWC": - channels = data.shape[3] - if "SMLAD" in isa and (channels % 4) == 0 and kernel_layout == "HWOI": + if "SMLAD" in isa and kernel_layout == "HWOI": strategy.add_implementation( wrap_compute_conv2d(topi.arm_cpu.conv2d_direct_simd), wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_direct_simd), diff --git a/python/tvm/topi/arm_cpu/cortex_m7/conv2d/direct_simd.py b/python/tvm/topi/arm_cpu/cortex_m7/conv2d/direct_simd.py index 988c3a99c059..1842aeca431b 100644 --- a/python/tvm/topi/arm_cpu/cortex_m7/conv2d/direct_simd.py +++ b/python/tvm/topi/arm_cpu/cortex_m7/conv2d/direct_simd.py @@ -112,10 +112,11 @@ def conv2d_direct_simd_compute(cfg, data, kernel, strides, padding, dilation, ou cfg.reduce_axis(in_channels.value), ) - assert in_channels.value % 4 == 0 owo, owi = cfg.define_split("tile_ow", ow, policy="factors", num_outputs=2) cio, cii = cfg.define_split( - "tile_ci", ci, policy="factors", num_outputs=2, filter=lambda x: x.size[-1] % 4 == 0 + "tile_ci", ci, policy="factors", num_outputs=2, + # TODO: check case with in_channels.value % 4 != 0 with AutoTVM + filter=None if cfg.is_fallback else lambda x: x.size[-1] % 4 == 0 ) coo, coi = cfg.define_split("tile_co", co, policy="factors", num_outputs=2) @@ -134,6 +135,11 @@ def conv2d_direct_simd_compute(cfg, data, kernel, strides, padding, dilation, ou cfg.define_knob("auto_unroll_max_step", [0, 2, 4, 8, 16, 32]) cfg.define_knob("unroll_explicit", [0, 1]) + if cfg.is_fallback: + cfg.fallback_split("tile_ow", [-1, out_width.value]) + cfg.fallback_split("tile_ci", [-1, in_channels.value]) + cfg.fallback_split("tile_co", [-1, out_channels.value]) + return conv diff --git a/python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py b/python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py index fb6f7a589525..fbfc3e65bb39 100644 --- a/python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py +++ b/python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py @@ -46,9 +46,8 @@ def intrin_gemm_MxKxN(M, K, N, in_dtype, out_dtype): K = K.value if isinstance(N, tvm.tir.IntImm): N = N.value - assert K % 4 == 0 # TODO(weberlo, areusch): support more dtypes? - assert in_dtype == "int8" + assert in_dtype == "int8" or in_dtype == "int16" assert out_dtype == "int32" A = te.placeholder((M, K), name="a", dtype=in_dtype) B = te.placeholder((N, K), name="b", dtype=in_dtype) @@ -71,13 +70,14 @@ def intrin_gemm_MxKxN(M, K, N, in_dtype, out_dtype): def intrin_func(ins, outs): aa, bb = ins cc = outs[0] + gemm_func_prefix = "gemm" if in_dtype == "int8" else "gemm16" def _reduce_update(): ib = tvm.tir.ir_builder.create() ib.emit( tvm.tir.call_extern( "int32", - f"gemm_{M}x{K}x{N}_update_{uniq_id}", + f"{gemm_func_prefix}_{M}x{K}x{N}_update_{uniq_id}", aa.access_ptr("r"), bb.access_ptr("r"), cc.access_ptr("w"), @@ -102,7 +102,7 @@ def _body(): ib.emit( tvm.tir.call_extern( "int32", - f"gemm_{M}x{K}x{N}_body_{uniq_id}", + f"{gemm_func_prefix}_{M}x{K}x{N}_body_{uniq_id}", aa.access_ptr("r"), bb.access_ptr("r"), cc.access_ptr("w"), @@ -132,32 +132,270 @@ def gemm_MxKxN_impl(M, K, N, uniq_id): #include #include +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm_{M}x{N}_body_rest_{uniq_id}( + int K, + int8_t *aa, int8_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + int k_base = (K / 4) * 4; + switch ( K % 4 ) {{ + case 1: + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int8_t *a_ptr = &aa[i * A_stride + k_base]; + int8_t *b_ptr = &bb[j * B_stride + k_base]; + cc[i * C_stride + j] = (int32_t) a_ptr[0] * (int32_t) b_ptr[0]; + }} + }} + break; + case 2: + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int8_t *a_ptr = &aa[i * A_stride + k_base]; + int8_t *b_ptr = &bb[j * B_stride + k_base]; + cc[i * C_stride + j] = (int32_t) a_ptr[0] * (int32_t) b_ptr[0] + + (int32_t) a_ptr[1] * (int32_t) b_ptr[1]; + }} + }} + break; + case 3: + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int8_t *a_ptr = &aa[i * A_stride + k_base]; + int8_t *b_ptr = &bb[j * B_stride + k_base]; + cc[i * C_stride + j] = (int32_t) a_ptr[0] * (int32_t) b_ptr[0] + + (int32_t) a_ptr[1] * (int32_t) b_ptr[1] + + (int32_t) a_ptr[2] * (int32_t) b_ptr[2]; + }} + }} + break; + }} + return 0; +}} + +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_body_loop_{uniq_id}( + int8_t *aa, int8_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int32_t sum = 0; + for (int l = 0; l < {K}; l++) {{ + sum += (int32_t) aa[i*A_stride + l] * (int32_t) bb[j*B_stride + l]; + }} + // NOTE: this is the line where `*_body` differs from `*_update`. here + // we're *setting* the result, instead of accumulating, because we know + // the `i` and `j` itervars span their entire respective axes. + cc[i*C_stride + j] = sum; + }} + }} + return 0; +}} + +#ifdef __cplusplus +extern "C" +#endif __STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_body_{uniq_id}( int8_t *aa, int8_t *bb, int32_t *cc, int A_stride, int B_stride, int C_stride) {{ - int16_t aa_pad[{aa_pad_size}]; int16_t bb_pad[{bb_pad_size}]; + if ( {M} < 16 || {N} < 16 ) + return gemm_{M}x{K}x{N}_body_loop_{uniq_id}(aa, bb, cc, A_stride, B_stride, C_stride); + + for (int i = 0; i < {N}; i++) + for (int j = 0; j < {K} / 4; j++) + read_and_pad(&bb[i*B_stride + j*4], (int32_t*) &bb_pad[i*{K} + j*4], (int32_t*) &bb_pad[i*{K} + j*4 + 2]); + for (int i = 0; i < {M}; i++) {{ - for (int j = 0; j < {K} / 4; j++) {{ - read_and_pad(&aa[i*A_stride + j*4], (int32_t*) &aa_pad[i*{K} + j*4], (int32_t*) &aa_pad[i*{K} + j*4 + 2]); + int16_t aa_pad_line[{K}]; + for (int l = 0; l < {K} / 4; l++) + read_and_pad(&aa[i*A_stride + l*4], (int32_t*) &aa_pad_line[l*4], (int32_t*) &aa_pad_line[l*4 + 2]); + + for (int j = 0; j < {N}; j++) {{ + int32_t *aa_ptr = (int32_t *) aa_pad_line; + int32_t *bb_ptr = (int32_t *) &bb_pad[j*{K}]; + int32_t sum = 0; + for (int l = 0; l < 2 * ({K} / 4); l++) {{ + sum = __SMLAD(*aa_ptr, *bb_ptr, sum); + ++ aa_ptr; ++ bb_ptr; + }} + // NOTE: this is the line where `*_body` differs from `*_update`. here + // we're *setting* the result, instead of accumulating, because we know + // the `i` and `j` itervars span their entire respective axes. + cc[i*C_stride + j] = sum; }} }} - for (int i = 0; i < {N}; i++) {{ - for (int j = 0; j < {K} / 4; j++) {{ + if ( {K} % 4 != 0 ) + gemm_{M}x{N}_body_rest_{uniq_id}({K}, aa, bb, cc, A_stride, B_stride, C_stride); + + return 0; +}} + + +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm_{M}x{N}_update_rest_{uniq_id}( + int K, + int8_t *aa, int8_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + int k_base = (K / 4) * 4; + switch ( K % 4 ) {{ + case 1: + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int8_t *a_ptr = &aa[i * A_stride + k_base]; + int8_t *b_ptr = &bb[j * B_stride + k_base]; + cc[i * C_stride + j] += (int32_t) a_ptr[0] * (int32_t) b_ptr[0]; + }} + }} + break; + case 2: + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int8_t *a_ptr = &aa[i * A_stride + k_base]; + int8_t *b_ptr = &bb[j * B_stride + k_base]; + cc[i * C_stride + j] += (int32_t) a_ptr[0] * (int32_t) b_ptr[0] + + (int32_t) a_ptr[1] * (int32_t) b_ptr[1]; + }} + }} + break; + case 3: + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int8_t *a_ptr = &aa[i * A_stride + k_base]; + int8_t *b_ptr = &bb[j * B_stride + k_base]; + cc[i * C_stride + j] += (int32_t) a_ptr[0] * (int32_t) b_ptr[0] + + (int32_t) a_ptr[1] * (int32_t) b_ptr[1] + + (int32_t) a_ptr[2] * (int32_t) b_ptr[2]; + }} + }} + break; + }} + return 0; +}} + +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_update_loop_{uniq_id}( + int8_t *aa, int8_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int32_t sum = 0; + for (int l = 0; l < {K}; l++) {{ + sum += (int32_t) aa[i*A_stride + l] * (int32_t) bb[j*B_stride + l]; + }} + cc[i*C_stride + j] += sum; + }} + }} + return 0; +}} + +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_update_{uniq_id}( + int8_t *aa, int8_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + int16_t bb_pad[{bb_pad_size}]; + + if ( {M} < 16 || {N} < 16 ) + return gemm_{M}x{K}x{N}_update_loop_{uniq_id}(aa, bb, cc, A_stride, B_stride, C_stride); + + for (int i = 0; i < {N}; i++) + for (int j = 0; j < {K} / 4; j++) read_and_pad(&bb[i*B_stride + j*4], (int32_t*) &bb_pad[i*{K} + j*4], (int32_t*) &bb_pad[i*{K} + j*4 + 2]); + + for (int i = 0; i < {M}; i++) {{ + int16_t aa_pad_line[{K}]; + for (int l = 0; l < {K} / 4; l++) + read_and_pad(&aa[i*A_stride + l*4], (int32_t*) &aa_pad_line[l*4], (int32_t*) &aa_pad_line[l*4 + 2]); + + for (int j = 0; j < {N}; j++) {{ + int32_t *aa_ptr = (int32_t *) aa_pad_line; + int32_t *bb_ptr = (int32_t *) &bb_pad[j*{K}]; + int32_t sum = 0; + for (int l = 0; l < 2 * ({K} / 4); l++) {{ + sum = __SMLAD(*aa_ptr, *bb_ptr, sum); + ++ aa_ptr; ++ bb_ptr; + }} + cc[i*C_stride + j] += sum; + }} + }} + + if ( {K} % 4 != 0 ) + gemm_{M}x{N}_update_rest_{uniq_id}({K}, aa, bb, cc, A_stride, B_stride, C_stride); + + return 0; +}} + + + +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm16_{M}x{N}_body_rest_{uniq_id}( + int K, + int16_t *aa, int16_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + int k_base = (K / 2) * 2; + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int16_t *a_ptr = &aa[i * A_stride + k_base]; + int16_t *b_ptr = &bb[j * B_stride + k_base]; + cc[i * C_stride + j] = (int32_t) a_ptr[0] * (int32_t) b_ptr[0]; + }} + }} + return 0; +}} + +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm16_{M}x{K}x{N}_body_loop_{uniq_id}( + int16_t *aa, int16_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int32_t sum = 0; + for (int l = 0; l < {K}; l++) {{ + sum += (int32_t) aa[i*A_stride + l] * (int32_t) bb[j*B_stride + l]; + }} + // NOTE: this is the line where `*_body` differs from `*_update`. here + // we're *setting* the result, instead of accumulating, because we know + // the `i` and `j` itervars span their entire respective axes. + cc[i*C_stride + j] = sum; }} }} + return 0; +}} + +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm16_{M}x{K}x{N}_body_{uniq_id}( + int16_t *aa, int16_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + if ( {M} < 2 || {N} < 2 ) + return gemm16_{M}x{K}x{N}_body_loop_{uniq_id}(aa, bb, cc, A_stride, B_stride, C_stride); for (int i = 0; i < {M}; i++) {{ for (int j = 0; j < {N}; j++) {{ + int32_t *aa_ptr = (int32_t *) &aa[i*A_stride]; + int32_t *bb_ptr = (int32_t *) &bb[j*B_stride]; + int32_t sum = 0; for (int l = 0; l < {K} / 2; l++) {{ - sum = __SMLAD( - *((int32_t*) &aa_pad[i*{K} + l*2]), - *((int32_t*) &bb_pad[j*{K} + l*2]), - sum); + sum = __SMLAD(*aa_ptr, *bb_ptr, sum); + ++ aa_ptr; ++ bb_ptr; }} // NOTE: this is the line where `*_body` differs from `*_update`. here // we're *setting* the result, instead of accumulating, because we know @@ -166,46 +404,80 @@ def gemm_MxKxN_impl(M, K, N, uniq_id): }} }} + if ( {K} % 2 != 0 ) + gemm16_{M}x{N}_body_rest_{uniq_id}({K}, aa, bb, cc, A_stride, B_stride, C_stride); + return 0; }} + #ifdef __cplusplus extern "C" #endif -__STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_update_{uniq_id}( - int8_t *aa, int8_t *bb, int32_t *cc, +__STATIC_FORCEINLINE int32_t gemm16_{M}x{N}_update_rest_{uniq_id}( + int K, + int16_t *aa, int16_t *bb, int32_t *cc, int A_stride, int B_stride, int C_stride) {{ - int16_t aa_pad[{aa_pad_size}]; - int16_t bb_pad[{bb_pad_size}]; - + int k_base = (K / 2) * 2; for (int i = 0; i < {M}; i++) {{ - for (int j = 0; j < {K} / 4; j++) {{ - read_and_pad(&aa[i*A_stride + j*4], (int32_t*) &aa_pad[i*{K} + j*4], (int32_t*) &aa_pad[i*{K} + j*4 + 2]); + for (int j = 0; j < {N}; j++) {{ + int16_t *a_ptr = &aa[i * A_stride + k_base]; + int16_t *b_ptr = &bb[j * B_stride + k_base]; + cc[i * C_stride + j] += (int32_t) a_ptr[0] * (int32_t) b_ptr[0]; }} }} + return 0; +}} - for (int i = 0; i < {N}; i++) {{ - for (int j = 0; j < {K} / 4; j++) {{ - read_and_pad(&bb[i*B_stride + j*4], (int32_t*) &bb_pad[i*{K} + j*4], (int32_t*) &bb_pad[i*{K} + j*4 + 2]); +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm16_{M}x{K}x{N}_update_loop_{uniq_id}( + int16_t *aa, int16_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int32_t sum = 0; + for (int l = 0; l < {K}; l++) {{ + sum += (int32_t) aa[i*A_stride + l] * (int32_t) bb[j*B_stride + l]; + }} + cc[i*C_stride + j] += sum; }} }} + return 0; +}} + +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm16_{M}x{K}x{N}_update_{uniq_id}( + int16_t *aa, int16_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + if ( {M} < 2 || {N} < 2 ) + return gemm16_{M}x{K}x{N}_update_loop_{uniq_id}(aa, bb, cc, A_stride, B_stride, C_stride); for (int i = 0; i < {M}; i++) {{ for (int j = 0; j < {N}; j++) {{ + int32_t *aa_ptr = (int32_t *) &aa[i*A_stride]; + int32_t *bb_ptr = (int32_t *) &bb[j*B_stride]; + int32_t sum = 0; for (int l = 0; l < {K} / 2; l++) {{ - sum = __SMLAD( - *((int32_t*) &aa_pad[i*{K} + l*2]), - *((int32_t*) &bb_pad[j*{K} + l*2]), - sum); + sum = __SMLAD(*aa_ptr, *bb_ptr, sum); + ++ aa_ptr; ++ bb_ptr; }} cc[i*C_stride + j] += sum; }} }} + if ( {K} % 2 != 0 ) + gemm16_{M}x{N}_update_rest_{uniq_id}({K}, aa, bb, cc, A_stride, B_stride, C_stride); + return 0; }} + + #ifdef __cplusplus extern "C" #endif diff --git a/tests/micro/testdata/armv7m/digit-2.jpg b/tests/micro/testdata/armv7m/digit-2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b709a206b8d776215dcaa78643b22fe628b3c43a GIT binary patch literal 572 zcmV-C0>l0P*#F=F5K2Z#MgRc;000310RRC1+Wgv=4-_35A08bV92_7dE+-%&EF&BoC^soAFflYVG#@89JvcHvE;BST|G)qX2ml-a z9036l0RO}Q9{>OW1pxs80RaI300000000010s{mE1_uZU3Jd?l0JRVR0s#X90t5pE z1q1{D00Dgg0s{a95d{(Xb($mz{*4NnC+Tr5k)}+G+9pM!N9a z+Fgtq;Ufks4k5G6O=vOe_>A$lT&9{hIqq>&me#H&a?UfhKQ?v%>I1(TC zPo;Q8dSAiM82D?!`n~+#AhvSzq6>Sai_G#i?k&erpO=siPeWdhbMfE9x=i;{+Ft7# z+e`yT6p%#>F!kUap}qMH^{zG#jmxgXeWv)|M1xS$?n4;X*+RSr2LNRA&OU5{I`h)J Keu^lfzyI08i0m@} literal 0 HcmV?d00001 diff --git a/tests/micro/testdata/armv7m/mnist_model_quant.tflite b/tests/micro/testdata/armv7m/mnist_model_quant.tflite new file mode 100644 index 0000000000000000000000000000000000000000..371235edccebfbc9ce7c715a980ddf7a6c39ce9c GIT binary patch literal 29888 zcmXWC2b3GvdFVeF49d)4fI;uQ>;l-{mu1;Ri4-YO%{|Xfv6DDGd3o>T<)3$yJ#mXH z%ZV*pwyZ9ZqDYFpbIIN17B08WE8+M)s;RbJ! zB>4{wx!aImH2A}Y{%S-1u0f{3TmNnIuOI-dbCXcB_7!|wxi{~xZ@nr$2n66SLtc-d z)iP%J-o48g74Iy*n0oge>C}lUtd6D|TO#kH5}dZyP!85{Is~UqQS}cj*EJ}v>ya_!Uph=u`Yb`n)ds1N57GJdHhq1 zee&44xs$!U&zi44GQ48j420`#yD#r8Sd}WyYnvN)Z#~jZ!ZK&PUdz=nOPFQRtkT9Qnf=`_=bCXv;Zd zN{wLysoB9+!}w$8mo(AAH6i}2xY;uWC-enpg~dC0v>5{DaGBa8SRp zK5sNk%RAd)+rFq@e zTkJuu#2sHcZic%(7<1trg*tTMHSo!;D{WZrIS;cqZ=&*A&#aE$G#}^W4{ba%2*sJ) z{MR(s?|B{;7LWY;>CB$+xy0PWs-*YTX-V$lboY~rX*(%m=$O4TOXpcAg@fzd2-O&HF!Q}??jk<3(=A}3K+-#J;*_iKQ zLx0rJUu%44Q={$G#yrnA%D+EQJH0I0D3$@+Du0o~ zByPZoPDB%-tkMMxZP5rGy*?=S`N2avzg$}HN$h~#p_;fJr$dk~uvJIN{>R0rV2Xc` zELMY51tp~`LScp(5K)SZsx3hA2BpPm!V60HQI%(LeGBtzWP4Q0UVkXY>#b>M-Os6z z0~9uSkmAmXc*>f1Ssq)HCygckobW^hfzuHwu8#4txnSVw8PQe~r!30-5Wbim|E6xK zWn1%c^Y#Cm`fhCK(A_WW^R#~abv0)jMRk!pUhEfCIOVFj<*A0k3)Wgn_M0I!?u5%6 zGR8{Ac`2z9OC}?Pj{ve9M-fX__cr4Nj`zG@91oN=nvZmDt+%!C+7mHXS`D5aO|u3v zH-}iY`v$S7kXKgFn8?pBVWlW25b#1ZKpcroOw3hT^l}c%hr9w01j@_0*1CGgG}?IxY(0}y79k4)v@DKnt$fB$eN}*f3;JsVG5o-f!UG>j}(m| z<3ko!fmV3nW5lu~jRHNUT0?AQrk5>_cChu+MwK2km|Nk;x3m;mtowC6Dfrr{PE&K~ z2?Zsa&b=sR_F3Ft+vvQLKK1M*i?fy5S1U4lr~+oaqu)?0f1A^MxKxv&%nUuQ%-qRM z!Qs|WoY~&APQz&XKk9hwd(gIA2b9gZht-fcUYA!{%$9cSNM8Mhi%S(B?jR3Mn9fOt zE0;QtynH6#{425$V`SMup>m{ndVL#K&z(L8-zi^w_$n{BnSD}xZ`}gTvs@4oFEPc; z3v6w`mI`0RSF_UYZmM=TxWLHfR%{Pu9ufnuEJYaT#Zl>6Y1_UK-*)DFFN46heD`lI z&n`^3WV5NzDmNcr>3b|IZ9a4w<=4fG+*?aCpOZH3W82l#{^{x%aaOg?Oy}=CECmiC z`Rz*xv=??2d9?#4++X#|UTqc7`=4t+#PjzC;NApWx5gDq2ci}oN2xFhF92lKsB?h{ zAQiQ`D!GkrFK8F%L`EF%Gl!2dM!eW*{tNPUDh-_H&qF&!Y1hFousV?D+i_taA~lsz z{gMFiGUe4Pu5`XSx9!T^i>D=8=_oBFWt)sWlw6GSBsI>DQL`{{E7dlS#AWo{tGR;9 z8~3%ute}>BCC?r(I-Nnl$fOg{nK^pJ=}}KB*BOJn-WQ)jINj5Aretm9c2r_75UPw|{jsNSSuXO2bk@Ozr5y7SHbtI98ST{#DS$^#gZKE^62^I6z=izJ2)kmIl zftN3Aw8H$`p5t-hbG){HVoNDPIipU`!8uw!E5@^AgdIio02KD8U=$;g94e1m6443; zo+yza>2#Q&vM6Cnpgf8!@Ypdlp2af7Dz%=&tI_}+*5hWT7=D}&#sli%TGgF`F(!jk zFZq~Zo)^TY+&EOur4d>d^+Bv8LTB)53=zc7@>y7&Dr8_ps?25-NjSl#)$=iW6~S?Y z$A%ySAlT@o(V(Oqkk+O31Waj)YD6%z#-(U@RQ{TmOW`JD%vCCp;U$n2MyU{T`&T(s zYNmqJ0aC|+X?Xyr7is}Yg@(BI%2>r+qSA0A4SCDeZL*SKQS)Sxzeg{olWCoxtg>Yb znGBP2r1kg-cDrEUTN-&AYWk-VO6s}%fmDa;c*U5~Q9;skq_tUW>oOEvt(8Pu$#%1{ zBRde0=z$T18xSVE0;noscVy~;lCr?{(=XqrhsE*a0nqpFP;9s6&=M70xzzX(1_6Ct zr|99}db3dABz-EIa+8zbWEOnfYC*wZ(sxy~VuZiu;S$xz`mQ{2ajl%wai>MWovPg) z;w5YG_>9;WUj@VNA-pQ|izy>%e%_*HX*oj{fO6M5&XZ8QraOw%Q|ClO2J1z=5)CM1dPe?jft#g&~H++%~ z?y3Bu+VjmttBF^G={p!xbTm}Ks34B>d!#%@kXKW(<7+Io9U*oGPz!?(sY6L1rc5%? zZV_HD$A~N@qpqtNK*E<*CUQoeJi~_h9%zZp5PDM_I=mI8(U`PO2&~e=0L&tx6U?H_ zJCLDZ;L>E3Zq=sAD6$tOsT!!PD~Rh44W%3uldY3O4@-3E-h5C!1*TZ|{1hwvYmB1A^pO@=jKcIK&zI-6y+6IbQDoaqF*R>{y4x@ABnp|mheqzu!LxT`@Bd{r9C^cQN5Fa(Iv%+n! zD%-O?=iy*5!H(vG6?OcoLF(G+7&X-RSeNRDG%&ejUDNPtjid8%p_J5atxLHJF(zKC z^b2{#;YyuZq9={Wf{0o5C)h5zH)0hA8)t_}daU_q>1@4#cOKc%C~F}ZN{Qz?sSXyI z7Bd(NwVXsPDYN*3N;D0yo6))+%D^0KJt2+9Awrr1A)p$nQnSY520I*+h$*P6SsYHU zc0e#!!>ks#57|^O%O9Z!v9}cD3vA+(3X)y2ZYmSf-uXb`h2nic z?GZ>n;ZGbO)@;C`*?rgUr(@f{*siSKy7V4(T{v?dmiAwJuJ)PZY-jh74Mi#U$Bv%= z-P?k;(Bsc`CeHr5`=i@T;Ya6YF8{~aXY7^f4sGuwb-vp4h5UZ(xpkxaSC@Xg^xgyi zep5mSEQY>#zo7rh+%Hz)iO3zrCpqz(HS%$gxN)FX2F~r`-+SVoxcl5@j*=jh#|>A@%1c6rOcPbSKEz0hiO7w%B>8ciy$5Gc{a zf2yXn(TiQp+FJx^VPLU}dg};%?HdP&L|!#FpB+*+acZ{v{mzcW1T5`}^)0Kzo~oZG z5JkxKX6lMqoHltLyhB}O;R#Vw^-=zO@WsV+wCM;n^#X&eB-Dlc{`?&$rWAHg;?KOu zs9TC1`AvbpNwE1x88u^ zMTOKMrCSGBFoMuIJRjey7uV!pozy>i{t99Ke&xF%Q+Xu})bvdim38I)%@(+=KJHv10^jbg(#cIHzV{nwJP6yqbm)9yb3J)dWM zSFmOKRlCX6R9R>bb{jvMbH}#B4~oZjZuBX4Pd_46Nz0L+LbG<<;#k{O~3uc)6U}68+PfCpL;2}>wjNWz7aVEYx{i4dq>vJ zt~|qS(I0&uJ9lM-mU*`up~P;RZ$M_fua;f&;||5I#lLXBoqE`G?6cUT4-fvjP`-Fn^^@t(mC(wT z-awa_15aCJ{C6&QeUM|YH(Z}nC|8Y(;twV?fl?~9+}>m5*M|HLO5hND1sB${q;CZ*}N9W%YFbobJ=8+b9x2J%d9 zPE@Q!wLB^$NX8*Yof47NDh@t1j6B3qlFO#=30we8sgC>UjB*D`N#}_z1SwVOxinSE zO!1)_qoVcVxFDY01qzvJVVjZXRN;De^Z{imdkmn34;QRD7QLxt$|Eg2EfWESO-oGv z+zPF@CzsVbNUfmO3w6Z0SsGoN6c88a5)q4ArRi`#omt~k2_<)+s=V)1IiB~vN~&ON z2!DQa^eg+K%il{@aRCj|6Z`3LI@*k-H6;sC4)XGl*DeCQy^~Fne^;tVdN-NOPpgDNchoBUt%_kf4zx=G*3F*A@q$_;y~KC)XW3Z-ot0 zC+c1Gl#dcUL+2!d%!PqiQ9c&4ZBn-sPIq-F2S*ed#fGe&c2IGy&0nJAqhZ)Z$@>V1 z`O?^nVQutIsjV*v7dy6kifSUx=?j4A{zTFe(sQeOY0iRomt=IS`wyHVMP3MSW<+E3px1G03Isk`Se0EEICR4;gTwP`_I+`ob(M z{n|bm7XI)w$4DGB3G43}dU+0|sDJ5pl&xAe^xlx3JV0w^g}UqdVS`uRw1}hEd(*fs z$d>36=_Y)<0~U&Uwf84N)Wy|pwn{8dKV|Lsc6`=KbBEnsi?mbDtMu2`tiTA~6 z%)7$c!H5JqP!cEV=2`*T9pIbjy!8pfJ68RVN?>B-!F9iFd2a(-&lO=_z4$qG52)W+ zc9M=UC}}r&- z;dlXbbAK>UtvaNIaJ95Ov6uIVTmt-ojT2^U1fNof&KNGrh1aTLMNbx1yHRsc>N~Pm z$jbo=Q6?k5 zAzIvC%?s)AXfeH!3IU~hPFk2tQR@>l7oS7;5*}~7=#(OBcyyN24I zXTLa5iSows3)!-GzskN~-KBMCm?P) z3q!j*O99gw9vxrPYYe&VphBowuIGK;*~ogXi7CXUr^wmHxt%oZ*2>}KJq4x2OXo_r zx8b~yo05y_n;(@qO-V43-44|J9&sCNGB~Q84}5r~hLzYQ?BQs3N?B@G=3OSTYqqe( zE-LeJ@?znQ zkCyNGZnM>XK2>u^FnlkyJ@duZbvDtP2Z1`*sY!&>T9Z1dweJLtDLFEaKs9G(YlN~%(Dfl!vU`1tGem2nJmb0m+V6hzh^t$A zZVg||RU+QUTvw9DEIVn6#U-L@+VP)_RhHr|HWP^Pk5abska8%$L+}%GQM_7DGn*h5 z(m5SNLqUqZmL@Y)O26L2GFIhvdr)j%5lqqr&CJ;M-QCxDzB}?oJol@630t_%z1jY7 zq&IhjWTHs-)qR z;)!BCtnKhxQ+rM#Z%?&K87z~4WbeiQS~(NL3v@%w!JeYq29Tel(VEg+ZupF0Q!=30G2}Etg5& z>Vg8I!JqA*$n5}4O>R|O{2Wp6tx{aFi%8UB*I#94r}=C`y2Q*~D@jta0FTWnjw)1b z(BX83`?6Av$Ss#*-n=Ell4bT3jG>Xk>ljp~^d+zg)FkUibhvacoggUN0IlRYHMDQ& zG!6L0lvOp$lyKF%!&-EjaUgE}B5uCF2fYmi>^I}?CC4=~w(8&8(AoIW&Cq*V_ic}` zNP1U(>snsAWPiF*trx1Gyx52FS@rT9KLcR3ar_e*6d`9_N$Bf8=P zi~U?j5UrPMzdTlBI0*qvjk9M|MheA_JHy)sHOPLNMv+3CmnF7VWKg^`TRm)V)uAp9 zk=9vIa298pMU=9GTvhI8KJjcO`Q8Xl@ii>cI@fQGA^%JOmv?}j1@4u(pZbwib%qE{6@(>B$et{f$4 z43a}+OsK5dp2Fo`7B2Q$0A=(@?gAxjP)${%$=#%lIaDr0Yc%guumD88>@w(9XBGrp zNn)FwV$nUpOshmx=7)9-jhp$J;H_v~IkRvwdE&%?@XG;l3$Vr7DPo z*~Wuhev-r(5j@;Zw1^l%3g3Bb7Ca`0EPA2T)3yS484IBRWyPE|s%tP-6lEDcM$qVP z*7ZbDzv*p;yh^Edz;Pjk&d87s0WagMl_R=`H8BZGavh#L?+~Gdj1l=Nrq85re9*Mq z@~#0iEowP!{USNZS+F0G;}T+hKT_cX&9~yMN=2=Z#h;Dq32|1Lta5NkK-*81T2XbU*3?X8HJ$)S>TU|QRi7Jry;Y^5 zH?Nw4*3Q|%@WH&Z2`Y4RM9>K*rNjs)NA{N$d!hi1l@`%MC{3{db)cnsaDI-t$4N>R z#HAeK%pm*sRGTUX4--n&wwJYY;)mNM!I6OQ?$qW-UTa8eTexd#Hjo2w*N!MRZJ>jd zduQEes}{#o?EJd@V)mgX*EO5DBK>VUeKs2_U%u^p(xl>`AIHS3jn5vkrYrLk&37_b zWaENB>AEtk63<)ab9i(L)u5&av%3_u%$nvuaZ@BC9^$jnsr=PB58r*O>xPUmF|$|r zoV)|sZZv(E`49xPztL?PoL9ngPS7sJ=4ZF&BHBgUxqPdJ=U>~Nd$3a5yfUw?oC6(_ z2f#yc>QFpAxwAB%ymv%ENard%beSJl~!TzOi$MlCaC%tG;a@v{rm{-xj|V9!tnB=e$hNn3EE9L2|Vu zU(9>4O?qJ+5700Bvk|pEeDSG&0oau{6)GE0wX!LnN{R~9Fc`<+J*NN-Acr0F>^8kQU*Jm~_9MHL6XNa7}338LfsHxK<>p zdp8hY+Ov-?K}s9aO0lidxTUH@-pQ$4lpg%M>HP5CRK|9qO~d zIIl+UIxJQZW#nCbAT*Fyhd>V3J8m)O~@3(^D`mfio}Vgb^wYkIekl%$uiLl z67XPF0NOsg8)lvwHm-96f6}_^x?As=@P!Ybi-t=Bsz?Q7N>jT=ZQp?|?Q`l*0D~)H zD__zVt)!V7mQsi*s-@%yrFut-uRP$05}a)Lm9g=${Yj-*=GW=jY!geD=W+c$n(@x$ zeB@SrbJ`4S>RvmyN-Z$!d4|`6^oTERv#vi>P z?3h%9KRj0Z#Cm+>IJbN~u-Lop^ljY!oj1D$j|6M`SG#-PyRu5T_|cP|-_Byl?|AIOR$zE6 zb+@TF+Vr~%?K_FS)!I$h#~)o(^!{%Avrm2{*J-IJOy!#^Qm_eQnb`swpIL7_6^UxK zBmkMiEDiq5UM~$9Lu~?RR8mT5NMP82OVhFHk5Z&Mj7@cNAbDw$GBa5={9OkBC zhvmw6jd4nO;F4N-UKBbkwb#om?mDf>SM%b$RYIIDR0AwNMYVqI=@pilQNo!yMk5vv zS8uREsc z*kh-2t21$q+8Sk4KHRo)iuH|4YBOr%l_@rot!}FZ08x6h?s+!t7jD`pFj}ZT?(ON_ zl1GXYluU$-rLv!?=O(@A=tlM@Tk?#pZhtC_*j#d$pK>e}qzQn~ zIt4v{Z&TH7p4Q`%p%Vv`UkEAN-Fc~?$%&b2gtX|Daw&yFis9vQOO{*HAE#(mzpR!E z{)0Rh3j$qy_X(2Q{FOsr{yd%uX4WOms%lw1%K)a9hJUuenx<8z)?qs~;?p)|$(g)p z)<3sl=;j|LhzcaeI9?qfV#`x`VfiSJ2Y(8MQ;I!9pun%@B&@>%LscXQ9<8c-8WD+j zM=Ata3$UXg;SW9zBs_2bW@Xg+uo0Y})Z|o8+^H5^eTkymurrp_c&oUD_J!BLYAGq{ zP6(TW0Z9@Q)F0EJQ9&?f+mfyTGH)*}w!RiZJk3y{#7t(PFYPn&yKY`R>|yK#XG*NS zCnAC7A$W@w$(AASS;{VN3_~9Jp|grfvP4S`t=3Tw{ju_L8z2FB?*TBSrq1}=#yd?j^{!YHyWV2Z8W=DyQ~Ddh^-&w z<9Qoun4<_ASzJsl$hA@Pm}HhyA0+YJeG{cBcaO|v#oI3W?l(8uQFlC{ZqHholus4- zScG5aN*(T9!F}jCzW#817#rK{;&!X$y9d?|{rLXf-XB>py7Qz$VoIIiMS9PEJEIPQ zwtejwCsnIGX@`_R!r3gQK9kKu z-|dfp|9H0U%n1xi)m`lCZJ~T~xl00A>Ab-YDuYE1 zR#0M^gcFRx!BRF7TzhkAXoj~5pD{ZjpgitFd9=?@Db zO2w(h&rk-c%s_b{<+`?LkY|pE1~=sUbWx_}sN0+?PHpV1$#*TaW-d_n&`@b&V6}D8 zp38@@mV%@+>}OLVQX!_phr0>m2?(>6BR{{iuIT4?PWo?Zs$;uqEF5V>PMZo5Lth;9S&+joTW31 z$G#Ic-*yO-xwS;m({cBi$vcPIhd=w$MVz{UeAse77<|O}ynJ#NBj@h=<8C%C&ODf;VnwB(;nsrNfohQ`!2L8&jK`(t z6g`R>6VBoEbTVI|q}e3OYGleG=q9U-y5g=V7E_=$0lRKV9zZ&SE$jRFf)cMF>0S?J z2|5wv%l32xO(A1IP1ZIJ!OY<6a3yG_!L%yK+9PCT@L5nOW$UO#dnUal41-v*dsUcv=rA)gr_FnTJNp+YVI+MrVLg+1^av0+Ov*B`~QHKodLpKjS~TmJgi zsYnY~L#_Y4U`FXiGC&Y6HC3hDveuu^Gc^u8f7H28?M-lw>rGV8zFbZPN0mx&-Xfm2Y*gbFQ~iSEO$n zXf&~Cg0}-p;YgUp+LST|64dowci{0RqYjJQjN}IGQNtHNDJF};dc3#Bp@V`TSDbG` zd}URRthJx`hN$Dv2X}g?r|*8ZX+wM_(aYe=ztzPYel|B{jvg`BOc+n{%%L`3YPkyx z_kv-YIs5s_jq>K5dx~Qc24b5GxX=@MHiL<+@9pzE*MR{+ou5qjPmYC6vpi7r96pHU zn~g+}a}ZR-0)Dapv3yVyE1xKLF(8_bu_mVm)R+!yR_8MnqLjiYgFSuB z#AYR4R5N!@CA|6#Tivrr5zIcdfA#F8go_?z=_JV_LNlvdNgK+yilqlY;R-8E(nFjG zU}g-2^r<+L0n%yaQW>fUd3i&zGuO>o-Dw1do-6vO^KS+1oC|-6Mssg^d`VB|)iar{ zD)jhX8*^s%U)Z@4Tm5|BVST9Hlk0RbbTUvYF!Mw;t!`+FgJ%tcQf?ed7l}ZYxvd_d z3D(PpN^#lvz3k1}b&e}Yt6CU$HcM$l~%S+zRzefkS6EjCu4p{H|2nSb`` zS2k6x0$3es{n|!#jkOT=eK$iv6i-6;SO?c!ddHjARZJDCJF6|RqL`(}HU{VM9#(uG zWMO<|GnEuX%>BNqMnsAVJ{5%nJLuEt6oV`oa}hr~@+Et$)ZdFsK_HwdGLL#aGR(5l zmX__xH+IU?I^))~O?dmfS{gSmTKUkQg_5d9wD}<4sz0R_L8KhZgMOX^gV#?>q@cwr z{KeD0pqZB*_*#D3*)(4|^YoB46?kd`0N7zW9JT=rEl+qhHyOmN|CU~4qzc31ttp*nY6Jb~Tbc=0+RR#rW`d@~9P%^s~ z2=AH%vz(}LnrbU12}isKW(-RLEwt6D+1AJWG)w zB}Xpcz8B-!KdT@f=Yk7@mb{zX4dZ-tHMR&6<25?--a6!6_|dsE9Lk(BMT70>{ z4%wXWmx-q&U)IiU4tv&f1#>7S^ntdUQg@vn)+?hLRuHFE__5WIB&8cpp|*KLIqo66AI9xqB{y~MLhISF8R zZhb(;da18;(AE66(mt`N`*k_Q8K_IY4GVJ=A)L{`B0*ZfVp^_MjBuP~r>VH1U0G4f zNbf^RzS0vll}cI7X6r(~u2b-^DO*oL`&7A zs!+6uMyrkcAz?{Pw8Id?1G`|+Hnm)~&x&b+Ld9F4VRKnK;ZD`@`gqN5_h)vj(r0{s za#x=fG!M45pj37y=ye`n4@SaIl&Gg3d%DQGWcygi`JxU4Ll^ zORVN+zHCwzYDEkwQDarQL=vskcIG~s+^O;yJ0HndV5zmcP}ys{9-Hj+>@xbE8$S;Y zfeRX?X5TU^FwhSjsUPbSHR8nVud4ynOkuH$#bt9} zwpT9UsklC)3+KpPof5@y30wP3~*DN*ijI$KZL~-z&C7yqw&^ zW!|5xEMPZeTJ{Nn*y5F%1aPm6(%F?}nOTy~qNU5|@1!BEeT_dM0kb&x&peqpA0bl%I4LVvPr_I1ucaL7}|**uU%D?s(kYL;H}c=n>$?duv>R)Jlu<0iYy zzp7NkRt1szHjH7>fIOIaHrvB3x;t|0EdAG}^@z!W$8`)w8nZ?i@+{}!ocF0q9a(v? zFwaWtaIbiYXf;}@^bKn}I^B)eZWtebf~^~}8y@kKtd*;D?5j}I8XuTGUD%egB&}R) zl}{;{U-f}JjU9o-EoN0p`V4_$1B$=%XFZy|%y&vDF+#TlAlxk2WtxsLXqsxKfEjse z7@Nxny$WNLgJ(>3SY0A4jaxY=1i z{jEYYo|+++bh35*^~0XZ(#bfFsm1KWp1^^dvmI(6sz2;e=(koJtDN>GM;i=L8^5h2 z25YQ5?3AKFg@WbvnNO3ClE9j?Vvp@aR%{9Kb4h7ts}#8(+Z)}2pDq)TC$_FI?bFx{ z(~TLmW2``R6L?5lGpt`by9(@K$>7dX&OM~Lk06VZL86{stzfKW2>4S7dpsZUT>iCv zxwduz&GA{2?uSv0Gc)m}BewC>ln~kM_0Xj|Pj-PVzgm$%XM}?k-pb|b3g_wSJ;sIL z+S9m5zMD*q?XDQ4nX}Vt@yO>ZkBSjDwd$MCE%@K|ezL=gl2pabh zqX<_)&3rz}L(JM32a%IlQzs#6=XE?nM`b0SwceM9!VtskQ8WUDL+%VudkktMSlCa! z-y?3 zIr4e-^mXmL&NjcsIr%|SXvo@@=*neIvT2NW7f$GTKxNaHr-)K~1HSE4A8fpn*k*g6 z!j;olrhQ?EGZbg0--jN*G@o6Ad}oiDhG>sLGz>*W%|#Azsz!Yyms=hya4GaSL(|Gcb2}D2JC%isst8OqCZJV*QCH1XvN%(VR*(^r zQY*I9K?Df`Jf6Jq_C$mqs*&Pqn3dqL;?yFtQ(_{JYdF0WV1Xjx(po9^gbr^wE^M-l}e@5$Y9}+gpfwli}^JoMq$@7fEbhH zIeNqZ6}6&FnaoL>Y8Zo8ElGh$8sa8Vw8Z{>8g>R|Q+R7|k;BmeELELOXV=nrQeC8B zjl~awd6yFrq7k4NU{RRn;IplYj-;Vel~H5HSF4egKH;D_fje5gg7nFFiy@_NvWjOV zQj9dZDjily2Ir%h^G90AJCdCaG8cHQ=+$p;Y{tbWvl;Ou0VKq$LUroI!7ul)l2n>4 zsW8!2^G1zJTL*(lm{0Rs>!#*%$PR~7PpVTEw5C91nlGlo#9p8?`%u*BgvsI$Dyfq5 zNqH<(=-gXr*$H+H_Zf*gLut?=?YBIHJiAB_}Ya}uqCO`N8g?{eo&iCTT0ruXvM;S1w2}^)HBFJ-c44NmIs)Z>qTqGJ^9U3EF z;pYlN*%YP^GUY}dsL7aWoGir8Fjdm(8IsV4b}Zh!M_e;K0&Zs>q>^6l-qnwo2V(Ea0bWsKt_IB(eYn?e_yQ_t)x#srcN zUtnxLVbaMrJapTomd69uA3S?z{t!5S&I!G*9}`o4`f?~A`|0H5yT9t#7}v0RK6;X< z{N}~-zug;|dgY9xsJ{qDyvjlBa=w#Ux>ZfG`jT|RyJ1Dg8Z zO0Sf!f2zw%Klps^)EmCXue|whIDc^|Y4E(2)l{c$cWo#|N#e)5mY==(yIb#^e2vfd zBV+L5k1rHQ5(Uj?_`CP*yu0PA%4=IcJn`lPG-gRvdtZHdy4U<8C_Mv?XrwQ;VdpD9 zz1y$RdT;#O73Ylil;r$r-veO!@!90HFP0Ae;FYxc%w(s2e%hZ>o&KaHGW^jDcIol( zr^fr&kF75M;z-*;7J$`z^9K+`R|cjPPH(^eypGIpwpCX6EUwl@p5{c}-2^;c2*j zzL2HCcIj$LO~70sI6$qoOVLFY`uzvB-AqZluI@|Ga)K}xe#yJ7vC2=71+1Av6s$Kf5VNeeUeg6YA(vALpwI~KLhdX z|KHwshR0Q04R7zgH*L|X_h#*tu8Ul;v21Kp10>jBQyoLF$(N89%#u(-H{F;TFc@QF z4DQ`lvt;$&R?h|B=GrYLu4Rx>=37?ZH2XHNaCl3 zsdg6G6HvO&4TcTSJ~$pCO zQ7XmoEt}+03iA?PhuSr*Xv*woTf5_sh(RO%tsq0$Ev{RT<@c5(i^L_ePF(3CZDwvE z<+n9e;mSO_ODvc!r%bGnW@Xv2tqB&d0^%_hEKwjEub`;O($*N&ha?%9*<=hyoTa4q zF)+7AC_>(m>kV>3Hv6PCoOHf*nJzymw4b#P_Y9@zX?+vlI$E-rh}fr{EbPMgJIq!= zD{D06@tz`dnmEIalLsy%?L?RB6k(`GFw%b9ba1MT)9XtgzTD3fXhy$itLO9HwY2uu zd%C_Q!jYCNa&%I9Qo28(Nbr{s)Q*ZCUxrB)UMbGzaKC8!{OzLmwE|jHbs#OtFp13* z8h148U6n;BYq5{o`)!}e;_pdgET1qjOzZDVpV{rBI^98uDC8VS6ssay$_#s^q@OS> zd|O+k%3~}QMn6w)SuEu4NiQU_sz2-;F&|5nsBXwl>6L|kW)43!{&wbhQ{lyvST`*u4QLNz|uHls0ZMBru8*arh*cFmf{6qDE8%yLh-MXPUk@eL(Jv=EUsL z{$Qm#m4Ois40J2FLb*$8nXEij*Q0xnM@@_5;|iTB>Oa1zx|q{mByJ`tdVRj~X}XB~ zEskoe#18O1g$XYIyOkB5Ktq=6(|Pz4P7S-!Fq!i%z0L&1?b9QDO=gm*h!MqGy|zFY zyyB~xa)#SQUan<|PW01+Rk2dzu~U5&nmmc}q>gZzk-e@Vtj&))sIlaTI9^Giko!tv z#jhMzHc7mhHsTaPI$Alc5)aj1%0anv#lQyX~jJS9SqYwP3 zI-_-PIp01-@fAwo8$(FvrwrC2M$X2^LhO|MgwURwpI@4L`*GeWe1u7_OO4`>IP+(c zXD7a!M=Fz5k=ctU@r^~cC9WniW74BDO7giz7Jk~VRTJ=K2y4uS`EIY0uUj!D717ch zi`)gd{gmOw-D*XI;HH(CV>w6~{Q|BqpL>FodAgzKa0Z@s84?uqvF_z(;^TBybGN^4 z8C8>*`6MrDrm*Smtip_up3r#kVM@e5Zz?)})On%!Sh0T)p(dHigesBlbw%(Ll82EH z>rn|lFgh{esiz!wiotk|F>Aoe_|HWv7EC#O2IhbPEY z+WOIeyCy7L#m_Z&cJ_~Wy`&->E-p4$IVw%m5V1`(AL>(cr9okeFXTH*{nAVC&nt*E*v zukTPHLml9S6xJZCk>9EQ);~SX+TdWdpNWO~mMWwyi~b-VKNB5M6ZxVM?2Iv_NNKW+ ztU*TBk-g(mSz%&|uPUT?+85H(tC~v>ReM8W;+O83Mq%|xhxLp}!c&V$-;UWXr5EAr z%}&QSOJwFcM3nmdrp_;75@Lb1Q6=cMC+$ufdpfMfUt(WxLXX=@BOF@Y!fcHju5RZycNR82Kj-n_urOeM~8e^+4 zlgwbAtjwC~cceEAd@veUJ6423g@Q?fA~B&3<+*6K1U;*}Vtw8UNmkpr^7tQ{Dp)+{ z%7uCTA?ebJ0O5h|x(E^Lwt~xJV@ycpjg2UH0&TZ}K!{!EvL$WNp{{~_3|Wv64<_)- zBLW#3U@KR5NnjP+6Ear1!afhTT7Z$`Pn{aNav0M=8V34S-1nvU0NQGAq=XFkZ zv{<#%CkApj*H7Kr7p(A)4UAbbrKg}VYto73~SoL zaKuy5s)+V=ep;-*VN0edO*9yL#8FN|}}f_tT+Mxrjor0M`|7={0)5 zSN|;-5+dwf8_FvunHYh{wCOA@Mi&MGwev6uA}LrfTE)wqSiRLVZv?MwOT;o+~p zL0n-G+enz=U5LzRN%GP{=H36l0%MWHgP~dLkmdjUwJ>)Y`ir3I>|)N zqQs`ZJL}3UFA0<;?G4zzga=O?HnD6NOX@Oq-kqkYqE0PKF5o=CjPOW(^mBgjh;N;6(TTYxzoU(1i(ykS7d_&o;zs?xA=j}y zUo#1J%*DPkLQ7%}Vd{qpT_Z`t#ggeVJ;`oo<4B33z`{7Yfv`H$nZdW`e^<9_X|9^5 zEu1;sf}LsR+5wRvH}?{ zsYJwvbFiZd2)x0;=HZ)42hs)t!z}v%#z^#@qe!cquChC>=rfGe1#x>wVAaNO!t{k? z#)yc?<;F?4bp%gV-0J07(xtp%3oUA~Mx-+GWyC7N@$n;hzC7WT@tlcagIR=S+6=r2 zdC}8WEOG52H729~-u;@o7=ts2jIkM)wR_Wg)I&WpBK~rJse2_c+A>xno~Grykw8im zC^&0Siv9ky^kC^3nNt2~dIy0hYNqk{)z0q42y{h0P8lEOiEhp#$m-WC#*Sy6#%D#{ zsvy1)7h)H2+eQYdMeZ{1RbN0)0e3)8I}xM=_zRjbBFKsm@L4p2AaWamgxMI(U4GzK z2&9)-5M(oO?sX&ZPqY}&HV+^OANUdH=MwHSMB12)TJe8bUAj`9)^mKKhg4p5`jGht=$ldl1cln_86&_A!>`4=47aH%9>Jfrwi#;8ZMnz+KGs+mRpYT8)X8_GR|j3>)Fq@F50Op|b1nNfCK#z=`UXSle~NOcbxsNU6D;8W>v#8w=t zE^b!!$1Ce)q)J4VaLSl0df`MWHE0Zo-Sc_YtnJ>8Dc`D&m>g2HDp$ zD`L=wxMC@tWPY>n(bJjptk}%JO6eDVwca(vrk^3XnR7fiCt z5i!2R!pBTRlrclbRGiTt7Dh*89>lH{=c*_^LRv_*Nu6#j?>DoJ3t(Oc8pY&Xp)iY}d({SnWP`nW!K`#HP~R#-X@OV33pj7)x)z zfAa2lJGZUmV)T6HrMjJnNmGHp5F67VN@Pjq0-JKN8`;Q>QOHl4~*rYJAje+7xwBEL-D~vv>^Y zSFRx^)pJ}wFESw{tavB=Fok?c%$!g}9lm0wnp0Sj8z?=*AI%mZrK^?;GZ&mAdlk2Z zGcE+mE8JtJ96S?vnTS&?rG5~*IitIFo}xHkJY$xx)i4%iV!I}w6Nv%nDBl~rt$(od z)4eS_k3Eq5XiK7Xs`;Sn_9l{T5yhCL`GbKLRq)pM<$^A9%LFDmp+$JFn{96CQHeJx zFVA=>+dYu&K)Q5Jd2&w%XF3N@^@*j!f_!6IF4KtTMmf89&M+@lq^*?m376ELTe9%& z3!+%A=+)FTfl99L(FS|5`n)b~{bXAJo5d%5X+8ipUA=CWx06Mz(p2Gjs`FwhqP5%N z3~y1S<+`=X(iHQ5pxKxJKsb;`dcS8JAUKXF(nPC{d@K(=RXRNAlPo>p| z(aJfmg+_!iH5((4-GOfneuB-&$=n#eTN|GC z3pxXqMH5j$MCc{D3XX^gj_TnrDQ??dez-bRHIoSDWk^D-gC!2`f`%`HUg0#IyKJfq zTq)^e-QD=56|TZYPc(6Az)^o@RM||l8wukB6CIaZ%X%@QrPZx&?gdjT?F;5*uR6>M zWA)^?J+D1HkC;tUe#+-bbwO%nfilH=EmM%L#!QMBcvpH*>r4y8-ELv@3bwD<&K;BZ zM`fx2-x?#R{plsRf+dyHqQH$QX)W7Ick;AUzCt}!MDb+g`6UiTB;Dw#T1U;w#qRk3~|9sTS${4hiYLCXA{v@3)~xa;Y~8*^m1TLQIrzC-BFCEb=tNREk0K62O+J-J7<<6lhXn&qT&F}z$P z!sOo6;@OuTuBe~#o&>I?jt+IdWz@g$@jFjvQKZw6`74*nl%u4XGp+xa_%i1TpL=B9 z$=45_y|Rzgb7uGJd#I}{)|-)BtT1MtxzKCq2u}5zIr0M;#7j(^Jv@c=2QV|S@K^QN zFSCb>4XwQPn5kq!aK>dA#IzpHw3p&mr`@JsNY55e)k#^+l#b+R9g{S{z1b+H6*#2C zdDy+0lC-qd>|?h!8hWNpGY3446U7twv~R{3$`i6yUS#qLc069j6$!gJSCnO=2w@;@ zNnFnxF!C7m6Kv!qs&2L9M%oj{mx1?PDhu_uMN z)2w>6Iapz{5~aO8BwN_SdLoE%tWj`C-OgOxQXkHzr0SF7oB~>HbxzSn-dhZbaUD{0 zkMC6Zkc#y!OOu$4yGQLEkr2MEj&{6j&~cCv>~m5EQ+d*YoV8rOp9VCX+wj{fn@q6;!*cU3w%+f$}7u&cq_4j0V ztG_Zb#SiN9+<1{9QcaKY>rZ2J_Eb4*Ma(60_9H^q{B)N(#d1=Z^io`vztI+pzK-uq zPRnUSqrSHNLqw-mN9oj>6Us_fHL~ihrJMv|C~TXGLL0CP4BV<4eR}xrd)gPYrOd z+quy5(3U&XpVc=5Pq?%Gwn;!Psyph(7pQ{NfX{W{Y@}z4YO2!1wl! zEt`vG{dEgKJ_f`3r?+p^*Eb8~p!o`bpMmr4K7a=RqP%~<>49zF149G27349{a&XSw z2v7mA7v#SN@P2?90MR;)!1pey6F36T%0zvE15ykR;1XcFt2$sP9FRuivu&^<$Q%vo zUz`gN#(rF;@rG1+^pECuD$mpg+EE=Z|!F9OPdsAB_W; z<%8Bo`GH}EdGLq&ST#t`>2N2=2g#r4fYw2EKy8EC6ZPwi`c6jc0U%a@00MYb9@_un zf7%q4`%5~_^&uV5Tsv;)KnhwBfZ%pa)c320CN{bj)Ue z7GRhJFwp$~cL1RiK)3@iyba(e;PVJT)PDDaI~)o4R0FI5h{^zfUj_nHUtn4U)u|TH zp?ad_s9rk(&H1(KzD6>P#@eVfTCGms%3v&AOrF3 zTmJAE+M-Qci|^gKX~&K&+ln`A*?#|~N4ESRw+7IC^S#_05MK!povQ$-n?Zou8Rd=Y zh++-E_x}g-(*fNz@_)bSPj@}?@WWdl*mhsp=DW&@*KGRJI*`1|e~$MY{~B=jhuUFI zA9UUWpw@$6P9JnEqF4j40MP%MJ^l*luF>Znu;R`tvSG{C$KanUSn|N8?Ta^W{@F5g zy)Ik!vqj%m1BTwT`Ib$OKCtQTty^w*r1;S-+aG@H(R;Sc>W{YbTK!AFedM+6Mb|d~ zKe9i%R-mW`r~vf;uDu6uIXwU0+Q0Ot_P>_@g@4fpT|)r;$o}ZsfT9}Uf&YO%KX3n1 zV5gt8e`)bA+rPB<7w!MgYQJj#YxV!e{bT#1>okBL*&oeA5ncP<2lW5W{@>mcgVliU znql>$@&AMMe;EJQ@~`pySfB6LXX+Wib8h^j^BYC9tN_senm&I4bl2!}b^QN=^?%v+ zzi9cnD!*#?YxT|nckFZHei_gY?GpghS`f^&6W#Bih|U=Sphw$@t_Ns(ZrwxU`lp_P zQCsY|cWcof9@_*~y~n{S_k(?}x}k>YfQhtpWLte{r^A*x4ol#ZP}a9t%R4L`?j5i z+KnGWryt)7MJV{H_q$$%{&MFIsN&&+&|Nou3Rz$M7<#N^7qq?Epfff<3%&8oz0mqC zUAhpt3)+>bhu*E*4Jif&Aok-AK&POs(C$5x;yFdZzhB=s{qEYv$EI_XNNE=qdZf z1rY3Me(ljeyN;lC|0DQ56I}-~z}!U7ApoclhzG#;kmx+6{AZrd8s&d`Py-CGEU^~W z%jd($9mVkVFBZTvd)2UM`x$uG6YJr>EUbXHUfBe%Gu{cOZ+f8mH=AnV-QF^I`Fpol zcTsME52>z)O)H;+|FH0W_(9_~_@11z@b>;yux?`sT=v_?VRz$0Fm(AoIQ^aVFi!Y! z^|Nm*fwA{w!4GQRsisW44ZpUn8V-hk3(srW1CyS~fybP;dbsVsHo|;N z2K?6cU2w8y3%r)M2A;y-0Pk6|1Ww*~GkoOF#_BZbeE7{jG2lHnT!f$F6~POXPr*;I z)>em~)WCh0m%;U?HL&`HyWjze4u1ZIZ7}z84hU8b*D zckyE^RK5<;J-2p7m$?Q9ZL`p!?J+hqL!8tdx!Ixn`pQM!7s+}ZDM^E#e57BuZaoot z?uIkEKR=Fv9{DsKI{x7yo%D=NckfZNZu;DmPH;<;u4A2BH?Ld`DL+r>a*uK#*$FXZ zs4j(`zHU&*Jx+n1d30F!^Xu?G_wqk;&icPP0?e+%XuDJ3e^{a8Io44 zFOWaG4*zGI%o;@vLdU{e;n)7O7QW^Azrh>-_B8yvC$_@xK+nK&+N<#G6>q=~i|&K( z`Q#7qZ?r4m&+l6WuaUh1zq#-Zn7#IS`1Er%@E`B^E4*pt=dkLPa`?T}!*IvML+}kl z_ruS%t%lz@xC72@`4m2!t%n!+-+=E|?t%5?N8v2>Z{f8~FT(HMde<+k19Rix>N_Gb87STVYd~e}{$#g~_{0uf`KWMfsoz9v3L!3P3=Z{kxifjf}ZFH6UdB4wM+>iPjT<6YCFmw`j@L|J8}|tpK\n") + header_file.write("#include \n") + header_file.write("#include \n") + header_file.write(f"const size_t {tensor_name}_len = {npy_data.size};\n") + + if npy_data.dtype == "int8": + header_file.write(f"int8_t {tensor_name}[] =") + elif npy_data.dtype == "int32": + header_file.write(f"int32_t {tensor_name}[] = ") + elif npy_data.dtype == "uint8": + header_file.write(f"uint8_t {tensor_name}[] = ") + elif npy_data.dtype == "float32": + header_file.write(f"float {tensor_name}[] = ") + else: + raise ValueError("Data type not expected.") + + header_file.write("{") + for i in np.ndindex(npy_data.shape): + header_file.write(f"{npy_data[i]}, ") + header_file.write("};\n\n") + + header_file_bytes = bytes(header_file.getvalue(), "utf-8") + raw_path = pathlib.Path(output_path) / f"{tensor_name}.h" + ti = tarfile.TarInfo(name=str(raw_path)) + ti.size = len(header_file_bytes) + ti.mode = 0o644 + ti.type = tarfile.REGTYPE + tar_file.addfile(ti, io.BytesIO(header_file_bytes)) + + + + +def _open_tflite_model(model_path: str): + # Import TFLite model + tflite_model_buf = open(model_path, "rb").read() + try: + import tflite + + tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0) + except AttributeError: + import tflite.Model + + tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0) + + relay_mod, params = relay.frontend.from_tflite(tflite_model) + + return relay_mod, params + +def _get_test_data(testdata_dir): + + from PIL import Image + + image_files = ["digit-2.jpg"] + + for file in image_files: + img = Image.open(testdata_dir / file).resize((28, 28)) + img = np.asarray(img).astype("uint8") + sample = np.reshape(img, -1) + + output_shape = (1, 10) + + return sample, output_shape + + +def _apply_desired_layout_isa(relay_mod): + + desired_layouts = {'qnn.conv2d': ['NHWC', 'HWOI'], 'nn.conv2d': ['NHWC', 'HWOI']} + + seq = tvm.transform.Sequential([relay.transform.RemoveUnusedFunctions(), relay.transform.ConvertLayout(desired_layouts)]) + + with tvm.transform.PassContext(opt_level=3): + return seq(relay_mod) + +def _apply_desired_layout_no_isa(relay_mod): + + desired_layouts = {'qnn.conv2d': ['NHWC', 'HWIO'], 'nn.conv2d': ['NHWC', 'HWIO']} + + seq = tvm.transform.Sequential([relay.transform.RemoveUnusedFunctions(), relay.transform.ConvertLayout(desired_layouts)]) + + with tvm.transform.PassContext(opt_level=3): + return seq(relay_mod) + +def _generate_project(temp_dir, board, west_cmd, lowered, build_config, sample, output_shape): + + with tempfile.NamedTemporaryFile() as tar_temp_file: + with tarfile.open(tar_temp_file.name, "w:gz") as tf: + with tempfile.TemporaryDirectory() as tar_temp_dir: + model_files_path = os.path.join(tar_temp_dir, "include") + os.mkdir(model_files_path) + header_path = generate_c_interface_header( + lowered.libmod_name, ["input_1"], ["output"], model_files_path + ) + tf.add(header_path, arcname=os.path.relpath(header_path, tar_temp_dir)) + + _create_header_file("input_data", sample, "include", tf) + _create_header_file("output_data", np.zeros(shape=output_shape, dtype="float32"), "include", tf) + + project, _ = _build_project( + temp_dir, + board, + west_cmd, + lowered, + build_config, + extra_files_tar=tar_temp_file.name, + ) + + return project + + +def _run_model(temp_dir, board, west_cmd, lowered, build_config, sample, output_shape): + + project = _generate_project(temp_dir, board, west_cmd, lowered, build_config, sample, output_shape) + + project.flash() + + with project.transport() as transport: + timeout_read = 60 + # _get_message(transport, "#wakeup", timeout_sec=timeout_read) + transport.write(b"start\n", timeout_sec=5) + result_line = _get_message(transport, "#result", timeout_sec=timeout_read) + + result_line = result_line.strip("\n") + result_line = result_line.split(":") + result = int(result_line[1]) + time = int(result_line[2]) + logging.info(f"Result: {result}\ttime: {time} ms") + + return result, time + + +@tvm.testing.requires_micro +def test_armv7m_intrinsic(temp_dir, board, west_cmd, tvm_debug): + """Testing a ARM v7m SIMD extension.""" + + if board not in [ + "nrf5340dk", + "stm32f746xx_disco", + "stm32f746xx_nucleo", + "stm32l4r5zi_nucleo", + ]: + pytest.skip(msg="Platform does not support ARM v7m SIMD extenion.") + + model = conftest.ZEPHYR_BOARDS[board] + + build_config = {"debug": tvm_debug} + + this_dir = pathlib.Path(os.path.dirname(__file__)) + testdata_dir = this_dir.parent / "testdata" / "armv7m" + + relay_mod, params = _open_tflite_model(testdata_dir / "mnist_model_quant.tflite") + + sample, output_shape = _get_test_data(testdata_dir) + + relay_mod_isa = _apply_desired_layout_isa(relay_mod) + # kernel layout "HWIO" is not supported by arm_cpu SIMD extension (see tvm\python\relay\op\strategy\arm_cpu.py) + relay_mod_no_isa = _apply_desired_layout_no_isa(relay_mod) + + target = tvm.target.target.micro( + model, options=["-keys=arm_cpu,cpu", "-link-params=1", "--executor=aot", "--unpacked-api=1", "--interface-api=c"] + ) + + temp_dir_isa = temp_dir / "isa" + temp_dir_no_isa = temp_dir / "noisa" + + os.makedirs(temp_dir_isa, exist_ok=True) + os.makedirs(temp_dir_no_isa, exist_ok=True) + + with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): + lowered_isa = relay.build(relay_mod_isa, target, params=params) + lowered_no_isa = relay.build(relay_mod_no_isa, target, params=params) + result_isa, time_isa = _run_model(temp_dir_isa, board, west_cmd, lowered_isa, build_config, sample, output_shape) + result_no_isa, time_no_isa = _run_model(temp_dir_no_isa, board, west_cmd, lowered_no_isa, build_config, sample, output_shape) + + assert result_no_isa == result_isa + assert time_no_isa > time_isa + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__] + sys.argv[1:])) From 0ea201e05c7e0eb96c8d5b78f7fd81918638c60d Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Sun, 12 Sep 2021 11:25:30 -0700 Subject: [PATCH 02/22] [microTVM] Update support for ARMv7m intrinsic - Improved implementaion of gemm function for conv2d - Removed %4 restriction for channels - Added test case to verify SMLAD intrinsic speed acceleration Signed-off-by: Sergey Smirnov --- .../zephyr/base-box/base_box_test.sh | 2 + python/tvm/relay/op/strategy/arm_cpu.py | 3 +- .../arm_cpu/cortex_m7/conv2d/direct_simd.py | 10 +- .../arm_cpu/cortex_m7/micro_kernel/gemm.py | 326 ++++++++++++++++-- tests/micro/testdata/armv7m/digit-2.jpg | Bin 0 -> 572 bytes .../testdata/armv7m/mnist_model_quant.tflite | Bin 0 -> 29888 bytes tests/micro/zephyr/test_zephyr_armv7m.py | 293 ++++++++++++++++ 7 files changed, 603 insertions(+), 31 deletions(-) create mode 100644 tests/micro/testdata/armv7m/digit-2.jpg create mode 100644 tests/micro/testdata/armv7m/mnist_model_quant.tflite create mode 100644 tests/micro/zephyr/test_zephyr_armv7m.py diff --git a/apps/microtvm/reference-vm/zephyr/base-box/base_box_test.sh b/apps/microtvm/reference-vm/zephyr/base-box/base_box_test.sh index 2a023b520b01..fd8bb7a2c24a 100755 --- a/apps/microtvm/reference-vm/zephyr/base-box/base_box_test.sh +++ b/apps/microtvm/reference-vm/zephyr/base-box/base_box_test.sh @@ -37,3 +37,5 @@ if [ $board == "stm32f746xx" ]; then else pytest tests/micro/zephyr/test_zephyr_aot.py --zephyr-board=${board} fi + +pytest tests/micro/zephyr/test_zephyr_armv7m.py --zephyr-board=${board} \ No newline at end of file diff --git a/python/tvm/relay/op/strategy/arm_cpu.py b/python/tvm/relay/op/strategy/arm_cpu.py index 005eae68b8b7..f3d0c1af0a85 100644 --- a/python/tvm/relay/op/strategy/arm_cpu.py +++ b/python/tvm/relay/op/strategy/arm_cpu.py @@ -127,8 +127,7 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target): name="conv2d_hwcn.generic", ) elif layout == "NHWC": - channels = data.shape[3] - if "SMLAD" in isa and (channels % 4) == 0 and kernel_layout == "HWOI": + if "SMLAD" in isa and kernel_layout == "HWOI": strategy.add_implementation( wrap_compute_conv2d(topi.arm_cpu.conv2d_direct_simd), wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_direct_simd), diff --git a/python/tvm/topi/arm_cpu/cortex_m7/conv2d/direct_simd.py b/python/tvm/topi/arm_cpu/cortex_m7/conv2d/direct_simd.py index 988c3a99c059..1842aeca431b 100644 --- a/python/tvm/topi/arm_cpu/cortex_m7/conv2d/direct_simd.py +++ b/python/tvm/topi/arm_cpu/cortex_m7/conv2d/direct_simd.py @@ -112,10 +112,11 @@ def conv2d_direct_simd_compute(cfg, data, kernel, strides, padding, dilation, ou cfg.reduce_axis(in_channels.value), ) - assert in_channels.value % 4 == 0 owo, owi = cfg.define_split("tile_ow", ow, policy="factors", num_outputs=2) cio, cii = cfg.define_split( - "tile_ci", ci, policy="factors", num_outputs=2, filter=lambda x: x.size[-1] % 4 == 0 + "tile_ci", ci, policy="factors", num_outputs=2, + # TODO: check case with in_channels.value % 4 != 0 with AutoTVM + filter=None if cfg.is_fallback else lambda x: x.size[-1] % 4 == 0 ) coo, coi = cfg.define_split("tile_co", co, policy="factors", num_outputs=2) @@ -134,6 +135,11 @@ def conv2d_direct_simd_compute(cfg, data, kernel, strides, padding, dilation, ou cfg.define_knob("auto_unroll_max_step", [0, 2, 4, 8, 16, 32]) cfg.define_knob("unroll_explicit", [0, 1]) + if cfg.is_fallback: + cfg.fallback_split("tile_ow", [-1, out_width.value]) + cfg.fallback_split("tile_ci", [-1, in_channels.value]) + cfg.fallback_split("tile_co", [-1, out_channels.value]) + return conv diff --git a/python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py b/python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py index fb6f7a589525..fbfc3e65bb39 100644 --- a/python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py +++ b/python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py @@ -46,9 +46,8 @@ def intrin_gemm_MxKxN(M, K, N, in_dtype, out_dtype): K = K.value if isinstance(N, tvm.tir.IntImm): N = N.value - assert K % 4 == 0 # TODO(weberlo, areusch): support more dtypes? - assert in_dtype == "int8" + assert in_dtype == "int8" or in_dtype == "int16" assert out_dtype == "int32" A = te.placeholder((M, K), name="a", dtype=in_dtype) B = te.placeholder((N, K), name="b", dtype=in_dtype) @@ -71,13 +70,14 @@ def intrin_gemm_MxKxN(M, K, N, in_dtype, out_dtype): def intrin_func(ins, outs): aa, bb = ins cc = outs[0] + gemm_func_prefix = "gemm" if in_dtype == "int8" else "gemm16" def _reduce_update(): ib = tvm.tir.ir_builder.create() ib.emit( tvm.tir.call_extern( "int32", - f"gemm_{M}x{K}x{N}_update_{uniq_id}", + f"{gemm_func_prefix}_{M}x{K}x{N}_update_{uniq_id}", aa.access_ptr("r"), bb.access_ptr("r"), cc.access_ptr("w"), @@ -102,7 +102,7 @@ def _body(): ib.emit( tvm.tir.call_extern( "int32", - f"gemm_{M}x{K}x{N}_body_{uniq_id}", + f"{gemm_func_prefix}_{M}x{K}x{N}_body_{uniq_id}", aa.access_ptr("r"), bb.access_ptr("r"), cc.access_ptr("w"), @@ -132,32 +132,270 @@ def gemm_MxKxN_impl(M, K, N, uniq_id): #include #include +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm_{M}x{N}_body_rest_{uniq_id}( + int K, + int8_t *aa, int8_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + int k_base = (K / 4) * 4; + switch ( K % 4 ) {{ + case 1: + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int8_t *a_ptr = &aa[i * A_stride + k_base]; + int8_t *b_ptr = &bb[j * B_stride + k_base]; + cc[i * C_stride + j] = (int32_t) a_ptr[0] * (int32_t) b_ptr[0]; + }} + }} + break; + case 2: + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int8_t *a_ptr = &aa[i * A_stride + k_base]; + int8_t *b_ptr = &bb[j * B_stride + k_base]; + cc[i * C_stride + j] = (int32_t) a_ptr[0] * (int32_t) b_ptr[0] + + (int32_t) a_ptr[1] * (int32_t) b_ptr[1]; + }} + }} + break; + case 3: + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int8_t *a_ptr = &aa[i * A_stride + k_base]; + int8_t *b_ptr = &bb[j * B_stride + k_base]; + cc[i * C_stride + j] = (int32_t) a_ptr[0] * (int32_t) b_ptr[0] + + (int32_t) a_ptr[1] * (int32_t) b_ptr[1] + + (int32_t) a_ptr[2] * (int32_t) b_ptr[2]; + }} + }} + break; + }} + return 0; +}} + +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_body_loop_{uniq_id}( + int8_t *aa, int8_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int32_t sum = 0; + for (int l = 0; l < {K}; l++) {{ + sum += (int32_t) aa[i*A_stride + l] * (int32_t) bb[j*B_stride + l]; + }} + // NOTE: this is the line where `*_body` differs from `*_update`. here + // we're *setting* the result, instead of accumulating, because we know + // the `i` and `j` itervars span their entire respective axes. + cc[i*C_stride + j] = sum; + }} + }} + return 0; +}} + +#ifdef __cplusplus +extern "C" +#endif __STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_body_{uniq_id}( int8_t *aa, int8_t *bb, int32_t *cc, int A_stride, int B_stride, int C_stride) {{ - int16_t aa_pad[{aa_pad_size}]; int16_t bb_pad[{bb_pad_size}]; + if ( {M} < 16 || {N} < 16 ) + return gemm_{M}x{K}x{N}_body_loop_{uniq_id}(aa, bb, cc, A_stride, B_stride, C_stride); + + for (int i = 0; i < {N}; i++) + for (int j = 0; j < {K} / 4; j++) + read_and_pad(&bb[i*B_stride + j*4], (int32_t*) &bb_pad[i*{K} + j*4], (int32_t*) &bb_pad[i*{K} + j*4 + 2]); + for (int i = 0; i < {M}; i++) {{ - for (int j = 0; j < {K} / 4; j++) {{ - read_and_pad(&aa[i*A_stride + j*4], (int32_t*) &aa_pad[i*{K} + j*4], (int32_t*) &aa_pad[i*{K} + j*4 + 2]); + int16_t aa_pad_line[{K}]; + for (int l = 0; l < {K} / 4; l++) + read_and_pad(&aa[i*A_stride + l*4], (int32_t*) &aa_pad_line[l*4], (int32_t*) &aa_pad_line[l*4 + 2]); + + for (int j = 0; j < {N}; j++) {{ + int32_t *aa_ptr = (int32_t *) aa_pad_line; + int32_t *bb_ptr = (int32_t *) &bb_pad[j*{K}]; + int32_t sum = 0; + for (int l = 0; l < 2 * ({K} / 4); l++) {{ + sum = __SMLAD(*aa_ptr, *bb_ptr, sum); + ++ aa_ptr; ++ bb_ptr; + }} + // NOTE: this is the line where `*_body` differs from `*_update`. here + // we're *setting* the result, instead of accumulating, because we know + // the `i` and `j` itervars span their entire respective axes. + cc[i*C_stride + j] = sum; }} }} - for (int i = 0; i < {N}; i++) {{ - for (int j = 0; j < {K} / 4; j++) {{ + if ( {K} % 4 != 0 ) + gemm_{M}x{N}_body_rest_{uniq_id}({K}, aa, bb, cc, A_stride, B_stride, C_stride); + + return 0; +}} + + +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm_{M}x{N}_update_rest_{uniq_id}( + int K, + int8_t *aa, int8_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + int k_base = (K / 4) * 4; + switch ( K % 4 ) {{ + case 1: + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int8_t *a_ptr = &aa[i * A_stride + k_base]; + int8_t *b_ptr = &bb[j * B_stride + k_base]; + cc[i * C_stride + j] += (int32_t) a_ptr[0] * (int32_t) b_ptr[0]; + }} + }} + break; + case 2: + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int8_t *a_ptr = &aa[i * A_stride + k_base]; + int8_t *b_ptr = &bb[j * B_stride + k_base]; + cc[i * C_stride + j] += (int32_t) a_ptr[0] * (int32_t) b_ptr[0] + + (int32_t) a_ptr[1] * (int32_t) b_ptr[1]; + }} + }} + break; + case 3: + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int8_t *a_ptr = &aa[i * A_stride + k_base]; + int8_t *b_ptr = &bb[j * B_stride + k_base]; + cc[i * C_stride + j] += (int32_t) a_ptr[0] * (int32_t) b_ptr[0] + + (int32_t) a_ptr[1] * (int32_t) b_ptr[1] + + (int32_t) a_ptr[2] * (int32_t) b_ptr[2]; + }} + }} + break; + }} + return 0; +}} + +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_update_loop_{uniq_id}( + int8_t *aa, int8_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int32_t sum = 0; + for (int l = 0; l < {K}; l++) {{ + sum += (int32_t) aa[i*A_stride + l] * (int32_t) bb[j*B_stride + l]; + }} + cc[i*C_stride + j] += sum; + }} + }} + return 0; +}} + +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_update_{uniq_id}( + int8_t *aa, int8_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + int16_t bb_pad[{bb_pad_size}]; + + if ( {M} < 16 || {N} < 16 ) + return gemm_{M}x{K}x{N}_update_loop_{uniq_id}(aa, bb, cc, A_stride, B_stride, C_stride); + + for (int i = 0; i < {N}; i++) + for (int j = 0; j < {K} / 4; j++) read_and_pad(&bb[i*B_stride + j*4], (int32_t*) &bb_pad[i*{K} + j*4], (int32_t*) &bb_pad[i*{K} + j*4 + 2]); + + for (int i = 0; i < {M}; i++) {{ + int16_t aa_pad_line[{K}]; + for (int l = 0; l < {K} / 4; l++) + read_and_pad(&aa[i*A_stride + l*4], (int32_t*) &aa_pad_line[l*4], (int32_t*) &aa_pad_line[l*4 + 2]); + + for (int j = 0; j < {N}; j++) {{ + int32_t *aa_ptr = (int32_t *) aa_pad_line; + int32_t *bb_ptr = (int32_t *) &bb_pad[j*{K}]; + int32_t sum = 0; + for (int l = 0; l < 2 * ({K} / 4); l++) {{ + sum = __SMLAD(*aa_ptr, *bb_ptr, sum); + ++ aa_ptr; ++ bb_ptr; + }} + cc[i*C_stride + j] += sum; + }} + }} + + if ( {K} % 4 != 0 ) + gemm_{M}x{N}_update_rest_{uniq_id}({K}, aa, bb, cc, A_stride, B_stride, C_stride); + + return 0; +}} + + + +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm16_{M}x{N}_body_rest_{uniq_id}( + int K, + int16_t *aa, int16_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + int k_base = (K / 2) * 2; + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int16_t *a_ptr = &aa[i * A_stride + k_base]; + int16_t *b_ptr = &bb[j * B_stride + k_base]; + cc[i * C_stride + j] = (int32_t) a_ptr[0] * (int32_t) b_ptr[0]; + }} + }} + return 0; +}} + +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm16_{M}x{K}x{N}_body_loop_{uniq_id}( + int16_t *aa, int16_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int32_t sum = 0; + for (int l = 0; l < {K}; l++) {{ + sum += (int32_t) aa[i*A_stride + l] * (int32_t) bb[j*B_stride + l]; + }} + // NOTE: this is the line where `*_body` differs from `*_update`. here + // we're *setting* the result, instead of accumulating, because we know + // the `i` and `j` itervars span their entire respective axes. + cc[i*C_stride + j] = sum; }} }} + return 0; +}} + +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm16_{M}x{K}x{N}_body_{uniq_id}( + int16_t *aa, int16_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + if ( {M} < 2 || {N} < 2 ) + return gemm16_{M}x{K}x{N}_body_loop_{uniq_id}(aa, bb, cc, A_stride, B_stride, C_stride); for (int i = 0; i < {M}; i++) {{ for (int j = 0; j < {N}; j++) {{ + int32_t *aa_ptr = (int32_t *) &aa[i*A_stride]; + int32_t *bb_ptr = (int32_t *) &bb[j*B_stride]; + int32_t sum = 0; for (int l = 0; l < {K} / 2; l++) {{ - sum = __SMLAD( - *((int32_t*) &aa_pad[i*{K} + l*2]), - *((int32_t*) &bb_pad[j*{K} + l*2]), - sum); + sum = __SMLAD(*aa_ptr, *bb_ptr, sum); + ++ aa_ptr; ++ bb_ptr; }} // NOTE: this is the line where `*_body` differs from `*_update`. here // we're *setting* the result, instead of accumulating, because we know @@ -166,46 +404,80 @@ def gemm_MxKxN_impl(M, K, N, uniq_id): }} }} + if ( {K} % 2 != 0 ) + gemm16_{M}x{N}_body_rest_{uniq_id}({K}, aa, bb, cc, A_stride, B_stride, C_stride); + return 0; }} + #ifdef __cplusplus extern "C" #endif -__STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_update_{uniq_id}( - int8_t *aa, int8_t *bb, int32_t *cc, +__STATIC_FORCEINLINE int32_t gemm16_{M}x{N}_update_rest_{uniq_id}( + int K, + int16_t *aa, int16_t *bb, int32_t *cc, int A_stride, int B_stride, int C_stride) {{ - int16_t aa_pad[{aa_pad_size}]; - int16_t bb_pad[{bb_pad_size}]; - + int k_base = (K / 2) * 2; for (int i = 0; i < {M}; i++) {{ - for (int j = 0; j < {K} / 4; j++) {{ - read_and_pad(&aa[i*A_stride + j*4], (int32_t*) &aa_pad[i*{K} + j*4], (int32_t*) &aa_pad[i*{K} + j*4 + 2]); + for (int j = 0; j < {N}; j++) {{ + int16_t *a_ptr = &aa[i * A_stride + k_base]; + int16_t *b_ptr = &bb[j * B_stride + k_base]; + cc[i * C_stride + j] += (int32_t) a_ptr[0] * (int32_t) b_ptr[0]; }} }} + return 0; +}} - for (int i = 0; i < {N}; i++) {{ - for (int j = 0; j < {K} / 4; j++) {{ - read_and_pad(&bb[i*B_stride + j*4], (int32_t*) &bb_pad[i*{K} + j*4], (int32_t*) &bb_pad[i*{K} + j*4 + 2]); +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm16_{M}x{K}x{N}_update_loop_{uniq_id}( + int16_t *aa, int16_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + for (int i = 0; i < {M}; i++) {{ + for (int j = 0; j < {N}; j++) {{ + int32_t sum = 0; + for (int l = 0; l < {K}; l++) {{ + sum += (int32_t) aa[i*A_stride + l] * (int32_t) bb[j*B_stride + l]; + }} + cc[i*C_stride + j] += sum; }} }} + return 0; +}} + +#ifdef __cplusplus +extern "C" +#endif +__STATIC_FORCEINLINE int32_t gemm16_{M}x{K}x{N}_update_{uniq_id}( + int16_t *aa, int16_t *bb, int32_t *cc, + int A_stride, int B_stride, int C_stride) {{ + if ( {M} < 2 || {N} < 2 ) + return gemm16_{M}x{K}x{N}_update_loop_{uniq_id}(aa, bb, cc, A_stride, B_stride, C_stride); for (int i = 0; i < {M}; i++) {{ for (int j = 0; j < {N}; j++) {{ + int32_t *aa_ptr = (int32_t *) &aa[i*A_stride]; + int32_t *bb_ptr = (int32_t *) &bb[j*B_stride]; + int32_t sum = 0; for (int l = 0; l < {K} / 2; l++) {{ - sum = __SMLAD( - *((int32_t*) &aa_pad[i*{K} + l*2]), - *((int32_t*) &bb_pad[j*{K} + l*2]), - sum); + sum = __SMLAD(*aa_ptr, *bb_ptr, sum); + ++ aa_ptr; ++ bb_ptr; }} cc[i*C_stride + j] += sum; }} }} + if ( {K} % 2 != 0 ) + gemm16_{M}x{N}_update_rest_{uniq_id}({K}, aa, bb, cc, A_stride, B_stride, C_stride); + return 0; }} + + #ifdef __cplusplus extern "C" #endif diff --git a/tests/micro/testdata/armv7m/digit-2.jpg b/tests/micro/testdata/armv7m/digit-2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b709a206b8d776215dcaa78643b22fe628b3c43a GIT binary patch literal 572 zcmV-C0>l0P*#F=F5K2Z#MgRc;000310RRC1+Wgv=4-_35A08bV92_7dE+-%&EF&BoC^soAFflYVG#@89JvcHvE;BST|G)qX2ml-a z9036l0RO}Q9{>OW1pxs80RaI300000000010s{mE1_uZU3Jd?l0JRVR0s#X90t5pE z1q1{D00Dgg0s{a95d{(Xb($mz{*4NnC+Tr5k)}+G+9pM!N9a z+Fgtq;Ufks4k5G6O=vOe_>A$lT&9{hIqq>&me#H&a?UfhKQ?v%>I1(TC zPo;Q8dSAiM82D?!`n~+#AhvSzq6>Sai_G#i?k&erpO=siPeWdhbMfE9x=i;{+Ft7# z+e`yT6p%#>F!kUap}qMH^{zG#jmxgXeWv)|M1xS$?n4;X*+RSr2LNRA&OU5{I`h)J Keu^lfzyI08i0m@} literal 0 HcmV?d00001 diff --git a/tests/micro/testdata/armv7m/mnist_model_quant.tflite b/tests/micro/testdata/armv7m/mnist_model_quant.tflite new file mode 100644 index 0000000000000000000000000000000000000000..371235edccebfbc9ce7c715a980ddf7a6c39ce9c GIT binary patch literal 29888 zcmXWC2b3GvdFVeF49d)4fI;uQ>;l-{mu1;Ri4-YO%{|Xfv6DDGd3o>T<)3$yJ#mXH z%ZV*pwyZ9ZqDYFpbIIN17B08WE8+M)s;RbJ! zB>4{wx!aImH2A}Y{%S-1u0f{3TmNnIuOI-dbCXcB_7!|wxi{~xZ@nr$2n66SLtc-d z)iP%J-o48g74Iy*n0oge>C}lUtd6D|TO#kH5}dZyP!85{Is~UqQS}cj*EJ}v>ya_!Uph=u`Yb`n)ds1N57GJdHhq1 zee&44xs$!U&zi44GQ48j420`#yD#r8Sd}WyYnvN)Z#~jZ!ZK&PUdz=nOPFQRtkT9Qnf=`_=bCXv;Zd zN{wLysoB9+!}w$8mo(AAH6i}2xY;uWC-enpg~dC0v>5{DaGBa8SRp zK5sNk%RAd)+rFq@e zTkJuu#2sHcZic%(7<1trg*tTMHSo!;D{WZrIS;cqZ=&*A&#aE$G#}^W4{ba%2*sJ) z{MR(s?|B{;7LWY;>CB$+xy0PWs-*YTX-V$lboY~rX*(%m=$O4TOXpcAg@fzd2-O&HF!Q}??jk<3(=A}3K+-#J;*_iKQ zLx0rJUu%44Q={$G#yrnA%D+EQJH0I0D3$@+Du0o~ zByPZoPDB%-tkMMxZP5rGy*?=S`N2avzg$}HN$h~#p_;fJr$dk~uvJIN{>R0rV2Xc` zELMY51tp~`LScp(5K)SZsx3hA2BpPm!V60HQI%(LeGBtzWP4Q0UVkXY>#b>M-Os6z z0~9uSkmAmXc*>f1Ssq)HCygckobW^hfzuHwu8#4txnSVw8PQe~r!30-5Wbim|E6xK zWn1%c^Y#Cm`fhCK(A_WW^R#~abv0)jMRk!pUhEfCIOVFj<*A0k3)Wgn_M0I!?u5%6 zGR8{Ac`2z9OC}?Pj{ve9M-fX__cr4Nj`zG@91oN=nvZmDt+%!C+7mHXS`D5aO|u3v zH-}iY`v$S7kXKgFn8?pBVWlW25b#1ZKpcroOw3hT^l}c%hr9w01j@_0*1CGgG}?IxY(0}y79k4)v@DKnt$fB$eN}*f3;JsVG5o-f!UG>j}(m| z<3ko!fmV3nW5lu~jRHNUT0?AQrk5>_cChu+MwK2km|Nk;x3m;mtowC6Dfrr{PE&K~ z2?Zsa&b=sR_F3Ft+vvQLKK1M*i?fy5S1U4lr~+oaqu)?0f1A^MxKxv&%nUuQ%-qRM z!Qs|WoY~&APQz&XKk9hwd(gIA2b9gZht-fcUYA!{%$9cSNM8Mhi%S(B?jR3Mn9fOt zE0;QtynH6#{425$V`SMup>m{ndVL#K&z(L8-zi^w_$n{BnSD}xZ`}gTvs@4oFEPc; z3v6w`mI`0RSF_UYZmM=TxWLHfR%{Pu9ufnuEJYaT#Zl>6Y1_UK-*)DFFN46heD`lI z&n`^3WV5NzDmNcr>3b|IZ9a4w<=4fG+*?aCpOZH3W82l#{^{x%aaOg?Oy}=CECmiC z`Rz*xv=??2d9?#4++X#|UTqc7`=4t+#PjzC;NApWx5gDq2ci}oN2xFhF92lKsB?h{ zAQiQ`D!GkrFK8F%L`EF%Gl!2dM!eW*{tNPUDh-_H&qF&!Y1hFousV?D+i_taA~lsz z{gMFiGUe4Pu5`XSx9!T^i>D=8=_oBFWt)sWlw6GSBsI>DQL`{{E7dlS#AWo{tGR;9 z8~3%ute}>BCC?r(I-Nnl$fOg{nK^pJ=}}KB*BOJn-WQ)jINj5Aretm9c2r_75UPw|{jsNSSuXO2bk@Ozr5y7SHbtI98ST{#DS$^#gZKE^62^I6z=izJ2)kmIl zftN3Aw8H$`p5t-hbG){HVoNDPIipU`!8uw!E5@^AgdIio02KD8U=$;g94e1m6443; zo+yza>2#Q&vM6Cnpgf8!@Ypdlp2af7Dz%=&tI_}+*5hWT7=D}&#sli%TGgF`F(!jk zFZq~Zo)^TY+&EOur4d>d^+Bv8LTB)53=zc7@>y7&Dr8_ps?25-NjSl#)$=iW6~S?Y z$A%ySAlT@o(V(Oqkk+O31Waj)YD6%z#-(U@RQ{TmOW`JD%vCCp;U$n2MyU{T`&T(s zYNmqJ0aC|+X?Xyr7is}Yg@(BI%2>r+qSA0A4SCDeZL*SKQS)Sxzeg{olWCoxtg>Yb znGBP2r1kg-cDrEUTN-&AYWk-VO6s}%fmDa;c*U5~Q9;skq_tUW>oOEvt(8Pu$#%1{ zBRde0=z$T18xSVE0;noscVy~;lCr?{(=XqrhsE*a0nqpFP;9s6&=M70xzzX(1_6Ct zr|99}db3dABz-EIa+8zbWEOnfYC*wZ(sxy~VuZiu;S$xz`mQ{2ajl%wai>MWovPg) z;w5YG_>9;WUj@VNA-pQ|izy>%e%_*HX*oj{fO6M5&XZ8QraOw%Q|ClO2J1z=5)CM1dPe?jft#g&~H++%~ z?y3Bu+VjmttBF^G={p!xbTm}Ks34B>d!#%@kXKW(<7+Io9U*oGPz!?(sY6L1rc5%? zZV_HD$A~N@qpqtNK*E<*CUQoeJi~_h9%zZp5PDM_I=mI8(U`PO2&~e=0L&tx6U?H_ zJCLDZ;L>E3Zq=sAD6$tOsT!!PD~Rh44W%3uldY3O4@-3E-h5C!1*TZ|{1hwvYmB1A^pO@=jKcIK&zI-6y+6IbQDoaqF*R>{y4x@ABnp|mheqzu!LxT`@Bd{r9C^cQN5Fa(Iv%+n! zD%-O?=iy*5!H(vG6?OcoLF(G+7&X-RSeNRDG%&ejUDNPtjid8%p_J5atxLHJF(zKC z^b2{#;YyuZq9={Wf{0o5C)h5zH)0hA8)t_}daU_q>1@4#cOKc%C~F}ZN{Qz?sSXyI z7Bd(NwVXsPDYN*3N;D0yo6))+%D^0KJt2+9Awrr1A)p$nQnSY520I*+h$*P6SsYHU zc0e#!!>ks#57|^O%O9Z!v9}cD3vA+(3X)y2ZYmSf-uXb`h2nic z?GZ>n;ZGbO)@;C`*?rgUr(@f{*siSKy7V4(T{v?dmiAwJuJ)PZY-jh74Mi#U$Bv%= z-P?k;(Bsc`CeHr5`=i@T;Ya6YF8{~aXY7^f4sGuwb-vp4h5UZ(xpkxaSC@Xg^xgyi zep5mSEQY>#zo7rh+%Hz)iO3zrCpqz(HS%$gxN)FX2F~r`-+SVoxcl5@j*=jh#|>A@%1c6rOcPbSKEz0hiO7w%B>8ciy$5Gc{a zf2yXn(TiQp+FJx^VPLU}dg};%?HdP&L|!#FpB+*+acZ{v{mzcW1T5`}^)0Kzo~oZG z5JkxKX6lMqoHltLyhB}O;R#Vw^-=zO@WsV+wCM;n^#X&eB-Dlc{`?&$rWAHg;?KOu zs9TC1`AvbpNwE1x88u^ zMTOKMrCSGBFoMuIJRjey7uV!pozy>i{t99Ke&xF%Q+Xu})bvdim38I)%@(+=KJHv10^jbg(#cIHzV{nwJP6yqbm)9yb3J)dWM zSFmOKRlCX6R9R>bb{jvMbH}#B4~oZjZuBX4Pd_46Nz0L+LbG<<;#k{O~3uc)6U}68+PfCpL;2}>wjNWz7aVEYx{i4dq>vJ zt~|qS(I0&uJ9lM-mU*`up~P;RZ$M_fua;f&;||5I#lLXBoqE`G?6cUT4-fvjP`-Fn^^@t(mC(wT z-awa_15aCJ{C6&QeUM|YH(Z}nC|8Y(;twV?fl?~9+}>m5*M|HLO5hND1sB${q;CZ*}N9W%YFbobJ=8+b9x2J%d9 zPE@Q!wLB^$NX8*Yof47NDh@t1j6B3qlFO#=30we8sgC>UjB*D`N#}_z1SwVOxinSE zO!1)_qoVcVxFDY01qzvJVVjZXRN;De^Z{imdkmn34;QRD7QLxt$|Eg2EfWESO-oGv z+zPF@CzsVbNUfmO3w6Z0SsGoN6c88a5)q4ArRi`#omt~k2_<)+s=V)1IiB~vN~&ON z2!DQa^eg+K%il{@aRCj|6Z`3LI@*k-H6;sC4)XGl*DeCQy^~Fne^;tVdN-NOPpgDNchoBUt%_kf4zx=G*3F*A@q$_;y~KC)XW3Z-ot0 zC+c1Gl#dcUL+2!d%!PqiQ9c&4ZBn-sPIq-F2S*ed#fGe&c2IGy&0nJAqhZ)Z$@>V1 z`O?^nVQutIsjV*v7dy6kifSUx=?j4A{zTFe(sQeOY0iRomt=IS`wyHVMP3MSW<+E3px1G03Isk`Se0EEICR4;gTwP`_I+`ob(M z{n|bm7XI)w$4DGB3G43}dU+0|sDJ5pl&xAe^xlx3JV0w^g}UqdVS`uRw1}hEd(*fs z$d>36=_Y)<0~U&Uwf84N)Wy|pwn{8dKV|Lsc6`=KbBEnsi?mbDtMu2`tiTA~6 z%)7$c!H5JqP!cEV=2`*T9pIbjy!8pfJ68RVN?>B-!F9iFd2a(-&lO=_z4$qG52)W+ zc9M=UC}}r&- z;dlXbbAK>UtvaNIaJ95Ov6uIVTmt-ojT2^U1fNof&KNGrh1aTLMNbx1yHRsc>N~Pm z$jbo=Q6?k5 zAzIvC%?s)AXfeH!3IU~hPFk2tQR@>l7oS7;5*}~7=#(OBcyyN24I zXTLa5iSows3)!-GzskN~-KBMCm?P) z3q!j*O99gw9vxrPYYe&VphBowuIGK;*~ogXi7CXUr^wmHxt%oZ*2>}KJq4x2OXo_r zx8b~yo05y_n;(@qO-V43-44|J9&sCNGB~Q84}5r~hLzYQ?BQs3N?B@G=3OSTYqqe( zE-LeJ@?znQ zkCyNGZnM>XK2>u^FnlkyJ@duZbvDtP2Z1`*sY!&>T9Z1dweJLtDLFEaKs9G(YlN~%(Dfl!vU`1tGem2nJmb0m+V6hzh^t$A zZVg||RU+QUTvw9DEIVn6#U-L@+VP)_RhHr|HWP^Pk5abska8%$L+}%GQM_7DGn*h5 z(m5SNLqUqZmL@Y)O26L2GFIhvdr)j%5lqqr&CJ;M-QCxDzB}?oJol@630t_%z1jY7 zq&IhjWTHs-)qR z;)!BCtnKhxQ+rM#Z%?&K87z~4WbeiQS~(NL3v@%w!JeYq29Tel(VEg+ZupF0Q!=30G2}Etg5& z>Vg8I!JqA*$n5}4O>R|O{2Wp6tx{aFi%8UB*I#94r}=C`y2Q*~D@jta0FTWnjw)1b z(BX83`?6Av$Ss#*-n=Ell4bT3jG>Xk>ljp~^d+zg)FkUibhvacoggUN0IlRYHMDQ& zG!6L0lvOp$lyKF%!&-EjaUgE}B5uCF2fYmi>^I}?CC4=~w(8&8(AoIW&Cq*V_ic}` zNP1U(>snsAWPiF*trx1Gyx52FS@rT9KLcR3ar_e*6d`9_N$Bf8=P zi~U?j5UrPMzdTlBI0*qvjk9M|MheA_JHy)sHOPLNMv+3CmnF7VWKg^`TRm)V)uAp9 zk=9vIa298pMU=9GTvhI8KJjcO`Q8Xl@ii>cI@fQGA^%JOmv?}j1@4u(pZbwib%qE{6@(>B$et{f$4 z43a}+OsK5dp2Fo`7B2Q$0A=(@?gAxjP)${%$=#%lIaDr0Yc%guumD88>@w(9XBGrp zNn)FwV$nUpOshmx=7)9-jhp$J;H_v~IkRvwdE&%?@XG;l3$Vr7DPo z*~Wuhev-r(5j@;Zw1^l%3g3Bb7Ca`0EPA2T)3yS484IBRWyPE|s%tP-6lEDcM$qVP z*7ZbDzv*p;yh^Edz;Pjk&d87s0WagMl_R=`H8BZGavh#L?+~Gdj1l=Nrq85re9*Mq z@~#0iEowP!{USNZS+F0G;}T+hKT_cX&9~yMN=2=Z#h;Dq32|1Lta5NkK-*81T2XbU*3?X8HJ$)S>TU|QRi7Jry;Y^5 zH?Nw4*3Q|%@WH&Z2`Y4RM9>K*rNjs)NA{N$d!hi1l@`%MC{3{db)cnsaDI-t$4N>R z#HAeK%pm*sRGTUX4--n&wwJYY;)mNM!I6OQ?$qW-UTa8eTexd#Hjo2w*N!MRZJ>jd zduQEes}{#o?EJd@V)mgX*EO5DBK>VUeKs2_U%u^p(xl>`AIHS3jn5vkrYrLk&37_b zWaENB>AEtk63<)ab9i(L)u5&av%3_u%$nvuaZ@BC9^$jnsr=PB58r*O>xPUmF|$|r zoV)|sZZv(E`49xPztL?PoL9ngPS7sJ=4ZF&BHBgUxqPdJ=U>~Nd$3a5yfUw?oC6(_ z2f#yc>QFpAxwAB%ymv%ENard%beSJl~!TzOi$MlCaC%tG;a@v{rm{-xj|V9!tnB=e$hNn3EE9L2|Vu zU(9>4O?qJ+5700Bvk|pEeDSG&0oau{6)GE0wX!LnN{R~9Fc`<+J*NN-Acr0F>^8kQU*Jm~_9MHL6XNa7}338LfsHxK<>p zdp8hY+Ov-?K}s9aO0lidxTUH@-pQ$4lpg%M>HP5CRK|9qO~d zIIl+UIxJQZW#nCbAT*Fyhd>V3J8m)O~@3(^D`mfio}Vgb^wYkIekl%$uiLl z67XPF0NOsg8)lvwHm-96f6}_^x?As=@P!Ybi-t=Bsz?Q7N>jT=ZQp?|?Q`l*0D~)H zD__zVt)!V7mQsi*s-@%yrFut-uRP$05}a)Lm9g=${Yj-*=GW=jY!geD=W+c$n(@x$ zeB@SrbJ`4S>RvmyN-Z$!d4|`6^oTERv#vi>P z?3h%9KRj0Z#Cm+>IJbN~u-Lop^ljY!oj1D$j|6M`SG#-PyRu5T_|cP|-_Byl?|AIOR$zE6 zb+@TF+Vr~%?K_FS)!I$h#~)o(^!{%Avrm2{*J-IJOy!#^Qm_eQnb`swpIL7_6^UxK zBmkMiEDiq5UM~$9Lu~?RR8mT5NMP82OVhFHk5Z&Mj7@cNAbDw$GBa5={9OkBC zhvmw6jd4nO;F4N-UKBbkwb#om?mDf>SM%b$RYIIDR0AwNMYVqI=@pilQNo!yMk5vv zS8uREsc z*kh-2t21$q+8Sk4KHRo)iuH|4YBOr%l_@rot!}FZ08x6h?s+!t7jD`pFj}ZT?(ON_ zl1GXYluU$-rLv!?=O(@A=tlM@Tk?#pZhtC_*j#d$pK>e}qzQn~ zIt4v{Z&TH7p4Q`%p%Vv`UkEAN-Fc~?$%&b2gtX|Daw&yFis9vQOO{*HAE#(mzpR!E z{)0Rh3j$qy_X(2Q{FOsr{yd%uX4WOms%lw1%K)a9hJUuenx<8z)?qs~;?p)|$(g)p z)<3sl=;j|LhzcaeI9?qfV#`x`VfiSJ2Y(8MQ;I!9pun%@B&@>%LscXQ9<8c-8WD+j zM=Ata3$UXg;SW9zBs_2bW@Xg+uo0Y})Z|o8+^H5^eTkymurrp_c&oUD_J!BLYAGq{ zP6(TW0Z9@Q)F0EJQ9&?f+mfyTGH)*}w!RiZJk3y{#7t(PFYPn&yKY`R>|yK#XG*NS zCnAC7A$W@w$(AASS;{VN3_~9Jp|grfvP4S`t=3Tw{ju_L8z2FB?*TBSrq1}=#yd?j^{!YHyWV2Z8W=DyQ~Ddh^-&w z<9Qoun4<_ASzJsl$hA@Pm}HhyA0+YJeG{cBcaO|v#oI3W?l(8uQFlC{ZqHholus4- zScG5aN*(T9!F}jCzW#817#rK{;&!X$y9d?|{rLXf-XB>py7Qz$VoIIiMS9PEJEIPQ zwtejwCsnIGX@`_R!r3gQK9kKu z-|dfp|9H0U%n1xi)m`lCZJ~T~xl00A>Ab-YDuYE1 zR#0M^gcFRx!BRF7TzhkAXoj~5pD{ZjpgitFd9=?@Db zO2w(h&rk-c%s_b{<+`?LkY|pE1~=sUbWx_}sN0+?PHpV1$#*TaW-d_n&`@b&V6}D8 zp38@@mV%@+>}OLVQX!_phr0>m2?(>6BR{{iuIT4?PWo?Zs$;uqEF5V>PMZo5Lth;9S&+joTW31 z$G#Ic-*yO-xwS;m({cBi$vcPIhd=w$MVz{UeAse77<|O}ynJ#NBj@h=<8C%C&ODf;VnwB(;nsrNfohQ`!2L8&jK`(t z6g`R>6VBoEbTVI|q}e3OYGleG=q9U-y5g=V7E_=$0lRKV9zZ&SE$jRFf)cMF>0S?J z2|5wv%l32xO(A1IP1ZIJ!OY<6a3yG_!L%yK+9PCT@L5nOW$UO#dnUal41-v*dsUcv=rA)gr_FnTJNp+YVI+MrVLg+1^av0+Ov*B`~QHKodLpKjS~TmJgi zsYnY~L#_Y4U`FXiGC&Y6HC3hDveuu^Gc^u8f7H28?M-lw>rGV8zFbZPN0mx&-Xfm2Y*gbFQ~iSEO$n zXf&~Cg0}-p;YgUp+LST|64dowci{0RqYjJQjN}IGQNtHNDJF};dc3#Bp@V`TSDbG` zd}URRthJx`hN$Dv2X}g?r|*8ZX+wM_(aYe=ztzPYel|B{jvg`BOc+n{%%L`3YPkyx z_kv-YIs5s_jq>K5dx~Qc24b5GxX=@MHiL<+@9pzE*MR{+ou5qjPmYC6vpi7r96pHU zn~g+}a}ZR-0)Dapv3yVyE1xKLF(8_bu_mVm)R+!yR_8MnqLjiYgFSuB z#AYR4R5N!@CA|6#Tivrr5zIcdfA#F8go_?z=_JV_LNlvdNgK+yilqlY;R-8E(nFjG zU}g-2^r<+L0n%yaQW>fUd3i&zGuO>o-Dw1do-6vO^KS+1oC|-6Mssg^d`VB|)iar{ zD)jhX8*^s%U)Z@4Tm5|BVST9Hlk0RbbTUvYF!Mw;t!`+FgJ%tcQf?ed7l}ZYxvd_d z3D(PpN^#lvz3k1}b&e}Yt6CU$HcM$l~%S+zRzefkS6EjCu4p{H|2nSb`` zS2k6x0$3es{n|!#jkOT=eK$iv6i-6;SO?c!ddHjARZJDCJF6|RqL`(}HU{VM9#(uG zWMO<|GnEuX%>BNqMnsAVJ{5%nJLuEt6oV`oa}hr~@+Et$)ZdFsK_HwdGLL#aGR(5l zmX__xH+IU?I^))~O?dmfS{gSmTKUkQg_5d9wD}<4sz0R_L8KhZgMOX^gV#?>q@cwr z{KeD0pqZB*_*#D3*)(4|^YoB46?kd`0N7zW9JT=rEl+qhHyOmN|CU~4qzc31ttp*nY6Jb~Tbc=0+RR#rW`d@~9P%^s~ z2=AH%vz(}LnrbU12}isKW(-RLEwt6D+1AJWG)w zB}Xpcz8B-!KdT@f=Yk7@mb{zX4dZ-tHMR&6<25?--a6!6_|dsE9Lk(BMT70>{ z4%wXWmx-q&U)IiU4tv&f1#>7S^ntdUQg@vn)+?hLRuHFE__5WIB&8cpp|*KLIqo66AI9xqB{y~MLhISF8R zZhb(;da18;(AE66(mt`N`*k_Q8K_IY4GVJ=A)L{`B0*ZfVp^_MjBuP~r>VH1U0G4f zNbf^RzS0vll}cI7X6r(~u2b-^DO*oL`&7A zs!+6uMyrkcAz?{Pw8Id?1G`|+Hnm)~&x&b+Ld9F4VRKnK;ZD`@`gqN5_h)vj(r0{s za#x=fG!M45pj37y=ye`n4@SaIl&Gg3d%DQGWcygi`JxU4Ll^ zORVN+zHCwzYDEkwQDarQL=vskcIG~s+^O;yJ0HndV5zmcP}ys{9-Hj+>@xbE8$S;Y zfeRX?X5TU^FwhSjsUPbSHR8nVud4ynOkuH$#bt9} zwpT9UsklC)3+KpPof5@y30wP3~*DN*ijI$KZL~-z&C7yqw&^ zW!|5xEMPZeTJ{Nn*y5F%1aPm6(%F?}nOTy~qNU5|@1!BEeT_dM0kb&x&peqpA0bl%I4LVvPr_I1ucaL7}|**uU%D?s(kYL;H}c=n>$?duv>R)Jlu<0iYy zzp7NkRt1szHjH7>fIOIaHrvB3x;t|0EdAG}^@z!W$8`)w8nZ?i@+{}!ocF0q9a(v? zFwaWtaIbiYXf;}@^bKn}I^B)eZWtebf~^~}8y@kKtd*;D?5j}I8XuTGUD%egB&}R) zl}{;{U-f}JjU9o-EoN0p`V4_$1B$=%XFZy|%y&vDF+#TlAlxk2WtxsLXqsxKfEjse z7@Nxny$WNLgJ(>3SY0A4jaxY=1i z{jEYYo|+++bh35*^~0XZ(#bfFsm1KWp1^^dvmI(6sz2;e=(koJtDN>GM;i=L8^5h2 z25YQ5?3AKFg@WbvnNO3ClE9j?Vvp@aR%{9Kb4h7ts}#8(+Z)}2pDq)TC$_FI?bFx{ z(~TLmW2``R6L?5lGpt`by9(@K$>7dX&OM~Lk06VZL86{stzfKW2>4S7dpsZUT>iCv zxwduz&GA{2?uSv0Gc)m}BewC>ln~kM_0Xj|Pj-PVzgm$%XM}?k-pb|b3g_wSJ;sIL z+S9m5zMD*q?XDQ4nX}Vt@yO>ZkBSjDwd$MCE%@K|ezL=gl2pabh zqX<_)&3rz}L(JM32a%IlQzs#6=XE?nM`b0SwceM9!VtskQ8WUDL+%VudkktMSlCa! z-y?3 zIr4e-^mXmL&NjcsIr%|SXvo@@=*neIvT2NW7f$GTKxNaHr-)K~1HSE4A8fpn*k*g6 z!j;olrhQ?EGZbg0--jN*G@o6Ad}oiDhG>sLGz>*W%|#Azsz!Yyms=hya4GaSL(|Gcb2}D2JC%isst8OqCZJV*QCH1XvN%(VR*(^r zQY*I9K?Df`Jf6Jq_C$mqs*&Pqn3dqL;?yFtQ(_{JYdF0WV1Xjx(po9^gbr^wE^M-l}e@5$Y9}+gpfwli}^JoMq$@7fEbhH zIeNqZ6}6&FnaoL>Y8Zo8ElGh$8sa8Vw8Z{>8g>R|Q+R7|k;BmeELELOXV=nrQeC8B zjl~awd6yFrq7k4NU{RRn;IplYj-;Vel~H5HSF4egKH;D_fje5gg7nFFiy@_NvWjOV zQj9dZDjily2Ir%h^G90AJCdCaG8cHQ=+$p;Y{tbWvl;Ou0VKq$LUroI!7ul)l2n>4 zsW8!2^G1zJTL*(lm{0Rs>!#*%$PR~7PpVTEw5C91nlGlo#9p8?`%u*BgvsI$Dyfq5 zNqH<(=-gXr*$H+H_Zf*gLut?=?YBIHJiAB_}Ya}uqCO`N8g?{eo&iCTT0ruXvM;S1w2}^)HBFJ-c44NmIs)Z>qTqGJ^9U3EF z;pYlN*%YP^GUY}dsL7aWoGir8Fjdm(8IsV4b}Zh!M_e;K0&Zs>q>^6l-qnwo2V(Ea0bWsKt_IB(eYn?e_yQ_t)x#srcN zUtnxLVbaMrJapTomd69uA3S?z{t!5S&I!G*9}`o4`f?~A`|0H5yT9t#7}v0RK6;X< z{N}~-zug;|dgY9xsJ{qDyvjlBa=w#Ux>ZfG`jT|RyJ1Dg8Z zO0Sf!f2zw%Klps^)EmCXue|whIDc^|Y4E(2)l{c$cWo#|N#e)5mY==(yIb#^e2vfd zBV+L5k1rHQ5(Uj?_`CP*yu0PA%4=IcJn`lPG-gRvdtZHdy4U<8C_Mv?XrwQ;VdpD9 zz1y$RdT;#O73Ylil;r$r-veO!@!90HFP0Ae;FYxc%w(s2e%hZ>o&KaHGW^jDcIol( zr^fr&kF75M;z-*;7J$`z^9K+`R|cjPPH(^eypGIpwpCX6EUwl@p5{c}-2^;c2*j zzL2HCcIj$LO~70sI6$qoOVLFY`uzvB-AqZluI@|Ga)K}xe#yJ7vC2=71+1Av6s$Kf5VNeeUeg6YA(vALpwI~KLhdX z|KHwshR0Q04R7zgH*L|X_h#*tu8Ul;v21Kp10>jBQyoLF$(N89%#u(-H{F;TFc@QF z4DQ`lvt;$&R?h|B=GrYLu4Rx>=37?ZH2XHNaCl3 zsdg6G6HvO&4TcTSJ~$pCO zQ7XmoEt}+03iA?PhuSr*Xv*woTf5_sh(RO%tsq0$Ev{RT<@c5(i^L_ePF(3CZDwvE z<+n9e;mSO_ODvc!r%bGnW@Xv2tqB&d0^%_hEKwjEub`;O($*N&ha?%9*<=hyoTa4q zF)+7AC_>(m>kV>3Hv6PCoOHf*nJzymw4b#P_Y9@zX?+vlI$E-rh}fr{EbPMgJIq!= zD{D06@tz`dnmEIalLsy%?L?RB6k(`GFw%b9ba1MT)9XtgzTD3fXhy$itLO9HwY2uu zd%C_Q!jYCNa&%I9Qo28(Nbr{s)Q*ZCUxrB)UMbGzaKC8!{OzLmwE|jHbs#OtFp13* z8h148U6n;BYq5{o`)!}e;_pdgET1qjOzZDVpV{rBI^98uDC8VS6ssay$_#s^q@OS> zd|O+k%3~}QMn6w)SuEu4NiQU_sz2-;F&|5nsBXwl>6L|kW)43!{&wbhQ{lyvST`*u4QLNz|uHls0ZMBru8*arh*cFmf{6qDE8%yLh-MXPUk@eL(Jv=EUsL z{$Qm#m4Ois40J2FLb*$8nXEij*Q0xnM@@_5;|iTB>Oa1zx|q{mByJ`tdVRj~X}XB~ zEskoe#18O1g$XYIyOkB5Ktq=6(|Pz4P7S-!Fq!i%z0L&1?b9QDO=gm*h!MqGy|zFY zyyB~xa)#SQUan<|PW01+Rk2dzu~U5&nmmc}q>gZzk-e@Vtj&))sIlaTI9^Giko!tv z#jhMzHc7mhHsTaPI$Alc5)aj1%0anv#lQyX~jJS9SqYwP3 zI-_-PIp01-@fAwo8$(FvrwrC2M$X2^LhO|MgwURwpI@4L`*GeWe1u7_OO4`>IP+(c zXD7a!M=Fz5k=ctU@r^~cC9WniW74BDO7giz7Jk~VRTJ=K2y4uS`EIY0uUj!D717ch zi`)gd{gmOw-D*XI;HH(CV>w6~{Q|BqpL>FodAgzKa0Z@s84?uqvF_z(;^TBybGN^4 z8C8>*`6MrDrm*Smtip_up3r#kVM@e5Zz?)})On%!Sh0T)p(dHigesBlbw%(Ll82EH z>rn|lFgh{esiz!wiotk|F>Aoe_|HWv7EC#O2IhbPEY z+WOIeyCy7L#m_Z&cJ_~Wy`&->E-p4$IVw%m5V1`(AL>(cr9okeFXTH*{nAVC&nt*E*v zukTPHLml9S6xJZCk>9EQ);~SX+TdWdpNWO~mMWwyi~b-VKNB5M6ZxVM?2Iv_NNKW+ ztU*TBk-g(mSz%&|uPUT?+85H(tC~v>ReM8W;+O83Mq%|xhxLp}!c&V$-;UWXr5EAr z%}&QSOJwFcM3nmdrp_;75@Lb1Q6=cMC+$ufdpfMfUt(WxLXX=@BOF@Y!fcHju5RZycNR82Kj-n_urOeM~8e^+4 zlgwbAtjwC~cceEAd@veUJ6423g@Q?fA~B&3<+*6K1U;*}Vtw8UNmkpr^7tQ{Dp)+{ z%7uCTA?ebJ0O5h|x(E^Lwt~xJV@ycpjg2UH0&TZ}K!{!EvL$WNp{{~_3|Wv64<_)- zBLW#3U@KR5NnjP+6Ear1!afhTT7Z$`Pn{aNav0M=8V34S-1nvU0NQGAq=XFkZ zv{<#%CkApj*H7Kr7p(A)4UAbbrKg}VYto73~SoL zaKuy5s)+V=ep;-*VN0edO*9yL#8FN|}}f_tT+Mxrjor0M`|7={0)5 zSN|;-5+dwf8_FvunHYh{wCOA@Mi&MGwev6uA}LrfTE)wqSiRLVZv?MwOT;o+~p zL0n-G+enz=U5LzRN%GP{=H36l0%MWHgP~dLkmdjUwJ>)Y`ir3I>|)N zqQs`ZJL}3UFA0<;?G4zzga=O?HnD6NOX@Oq-kqkYqE0PKF5o=CjPOW(^mBgjh;N;6(TTYxzoU(1i(ykS7d_&o;zs?xA=j}y zUo#1J%*DPkLQ7%}Vd{qpT_Z`t#ggeVJ;`oo<4B33z`{7Yfv`H$nZdW`e^<9_X|9^5 zEu1;sf}LsR+5wRvH}?{ zsYJwvbFiZd2)x0;=HZ)42hs)t!z}v%#z^#@qe!cquChC>=rfGe1#x>wVAaNO!t{k? z#)yc?<;F?4bp%gV-0J07(xtp%3oUA~Mx-+GWyC7N@$n;hzC7WT@tlcagIR=S+6=r2 zdC}8WEOG52H729~-u;@o7=ts2jIkM)wR_Wg)I&WpBK~rJse2_c+A>xno~Grykw8im zC^&0Siv9ky^kC^3nNt2~dIy0hYNqk{)z0q42y{h0P8lEOiEhp#$m-WC#*Sy6#%D#{ zsvy1)7h)H2+eQYdMeZ{1RbN0)0e3)8I}xM=_zRjbBFKsm@L4p2AaWamgxMI(U4GzK z2&9)-5M(oO?sX&ZPqY}&HV+^OANUdH=MwHSMB12)TJe8bUAj`9)^mKKhg4p5`jGht=$ldl1cln_86&_A!>`4=47aH%9>Jfrwi#;8ZMnz+KGs+mRpYT8)X8_GR|j3>)Fq@F50Op|b1nNfCK#z=`UXSle~NOcbxsNU6D;8W>v#8w=t zE^b!!$1Ce)q)J4VaLSl0df`MWHE0Zo-Sc_YtnJ>8Dc`D&m>g2HDp$ zD`L=wxMC@tWPY>n(bJjptk}%JO6eDVwca(vrk^3XnR7fiCt z5i!2R!pBTRlrclbRGiTt7Dh*89>lH{=c*_^LRv_*Nu6#j?>DoJ3t(Oc8pY&Xp)iY}d({SnWP`nW!K`#HP~R#-X@OV33pj7)x)z zfAa2lJGZUmV)T6HrMjJnNmGHp5F67VN@Pjq0-JKN8`;Q>QOHl4~*rYJAje+7xwBEL-D~vv>^Y zSFRx^)pJ}wFESw{tavB=Fok?c%$!g}9lm0wnp0Sj8z?=*AI%mZrK^?;GZ&mAdlk2Z zGcE+mE8JtJ96S?vnTS&?rG5~*IitIFo}xHkJY$xx)i4%iV!I}w6Nv%nDBl~rt$(od z)4eS_k3Eq5XiK7Xs`;Sn_9l{T5yhCL`GbKLRq)pM<$^A9%LFDmp+$JFn{96CQHeJx zFVA=>+dYu&K)Q5Jd2&w%XF3N@^@*j!f_!6IF4KtTMmf89&M+@lq^*?m376ELTe9%& z3!+%A=+)FTfl99L(FS|5`n)b~{bXAJo5d%5X+8ipUA=CWx06Mz(p2Gjs`FwhqP5%N z3~y1S<+`=X(iHQ5pxKxJKsb;`dcS8JAUKXF(nPC{d@K(=RXRNAlPo>p| z(aJfmg+_!iH5((4-GOfneuB-&$=n#eTN|GC z3pxXqMH5j$MCc{D3XX^gj_TnrDQ??dez-bRHIoSDWk^D-gC!2`f`%`HUg0#IyKJfq zTq)^e-QD=56|TZYPc(6Az)^o@RM||l8wukB6CIaZ%X%@QrPZx&?gdjT?F;5*uR6>M zWA)^?J+D1HkC;tUe#+-bbwO%nfilH=EmM%L#!QMBcvpH*>r4y8-ELv@3bwD<&K;BZ zM`fx2-x?#R{plsRf+dyHqQH$QX)W7Ick;AUzCt}!MDb+g`6UiTB;Dw#T1U;w#qRk3~|9sTS${4hiYLCXA{v@3)~xa;Y~8*^m1TLQIrzC-BFCEb=tNREk0K62O+J-J7<<6lhXn&qT&F}z$P z!sOo6;@OuTuBe~#o&>I?jt+IdWz@g$@jFjvQKZw6`74*nl%u4XGp+xa_%i1TpL=B9 z$=45_y|Rzgb7uGJd#I}{)|-)BtT1MtxzKCq2u}5zIr0M;#7j(^Jv@c=2QV|S@K^QN zFSCb>4XwQPn5kq!aK>dA#IzpHw3p&mr`@JsNY55e)k#^+l#b+R9g{S{z1b+H6*#2C zdDy+0lC-qd>|?h!8hWNpGY3446U7twv~R{3$`i6yUS#qLc069j6$!gJSCnO=2w@;@ zNnFnxF!C7m6Kv!qs&2L9M%oj{mx1?PDhu_uMN z)2w>6Iapz{5~aO8BwN_SdLoE%tWj`C-OgOxQXkHzr0SF7oB~>HbxzSn-dhZbaUD{0 zkMC6Zkc#y!OOu$4yGQLEkr2MEj&{6j&~cCv>~m5EQ+d*YoV8rOp9VCX+wj{fn@q6;!*cU3w%+f$}7u&cq_4j0V ztG_Zb#SiN9+<1{9QcaKY>rZ2J_Eb4*Ma(60_9H^q{B)N(#d1=Z^io`vztI+pzK-uq zPRnUSqrSHNLqw-mN9oj>6Us_fHL~ihrJMv|C~TXGLL0CP4BV<4eR}xrd)gPYrOd z+quy5(3U&XpVc=5Pq?%Gwn;!Psyph(7pQ{NfX{W{Y@}z4YO2!1wl! zEt`vG{dEgKJ_f`3r?+p^*Eb8~p!o`bpMmr4K7a=RqP%~<>49zF149G27349{a&XSw z2v7mA7v#SN@P2?90MR;)!1pey6F36T%0zvE15ykR;1XcFt2$sP9FRuivu&^<$Q%vo zUz`gN#(rF;@rG1+^pECuD$mpg+EE=Z|!F9OPdsAB_W; z<%8Bo`GH}EdGLq&ST#t`>2N2=2g#r4fYw2EKy8EC6ZPwi`c6jc0U%a@00MYb9@_un zf7%q4`%5~_^&uV5Tsv;)KnhwBfZ%pa)c320CN{bj)Ue z7GRhJFwp$~cL1RiK)3@iyba(e;PVJT)PDDaI~)o4R0FI5h{^zfUj_nHUtn4U)u|TH zp?ad_s9rk(&H1(KzD6>P#@eVfTCGms%3v&AOrF3 zTmJAE+M-Qci|^gKX~&K&+ln`A*?#|~N4ESRw+7IC^S#_05MK!povQ$-n?Zou8Rd=Y zh++-E_x}g-(*fNz@_)bSPj@}?@WWdl*mhsp=DW&@*KGRJI*`1|e~$MY{~B=jhuUFI zA9UUWpw@$6P9JnEqF4j40MP%MJ^l*luF>Znu;R`tvSG{C$KanUSn|N8?Ta^W{@F5g zy)Ik!vqj%m1BTwT`Ib$OKCtQTty^w*r1;S-+aG@H(R;Sc>W{YbTK!AFedM+6Mb|d~ zKe9i%R-mW`r~vf;uDu6uIXwU0+Q0Ot_P>_@g@4fpT|)r;$o}ZsfT9}Uf&YO%KX3n1 zV5gt8e`)bA+rPB<7w!MgYQJj#YxV!e{bT#1>okBL*&oeA5ncP<2lW5W{@>mcgVliU znql>$@&AMMe;EJQ@~`pySfB6LXX+Wib8h^j^BYC9tN_senm&I4bl2!}b^QN=^?%v+ zzi9cnD!*#?YxT|nckFZHei_gY?GpghS`f^&6W#Bih|U=Sphw$@t_Ns(ZrwxU`lp_P zQCsY|cWcof9@_*~y~n{S_k(?}x}k>YfQhtpWLte{r^A*x4ol#ZP}a9t%R4L`?j5i z+KnGWryt)7MJV{H_q$$%{&MFIsN&&+&|Nou3Rz$M7<#N^7qq?Epfff<3%&8oz0mqC zUAhpt3)+>bhu*E*4Jif&Aok-AK&POs(C$5x;yFdZzhB=s{qEYv$EI_XNNE=qdZf z1rY3Me(ljeyN;lC|0DQ56I}-~z}!U7ApoclhzG#;kmx+6{AZrd8s&d`Py-CGEU^~W z%jd($9mVkVFBZTvd)2UM`x$uG6YJr>EUbXHUfBe%Gu{cOZ+f8mH=AnV-QF^I`Fpol zcTsME52>z)O)H;+|FH0W_(9_~_@11z@b>;yux?`sT=v_?VRz$0Fm(AoIQ^aVFi!Y! z^|Nm*fwA{w!4GQRsisW44ZpUn8V-hk3(srW1CyS~fybP;dbsVsHo|;N z2K?6cU2w8y3%r)M2A;y-0Pk6|1Ww*~GkoOF#_BZbeE7{jG2lHnT!f$F6~POXPr*;I z)>em~)WCh0m%;U?HL&`HyWjze4u1ZIZ7}z84hU8b*D zckyE^RK5<;J-2p7m$?Q9ZL`p!?J+hqL!8tdx!Ixn`pQM!7s+}ZDM^E#e57BuZaoot z?uIkEKR=Fv9{DsKI{x7yo%D=NckfZNZu;DmPH;<;u4A2BH?Ld`DL+r>a*uK#*$FXZ zs4j(`zHU&*Jx+n1d30F!^Xu?G_wqk;&icPP0?e+%XuDJ3e^{a8Io44 zFOWaG4*zGI%o;@vLdU{e;n)7O7QW^Azrh>-_B8yvC$_@xK+nK&+N<#G6>q=~i|&K( z`Q#7qZ?r4m&+l6WuaUh1zq#-Zn7#IS`1Er%@E`B^E4*pt=dkLPa`?T}!*IvML+}kl z_ruS%t%lz@xC72@`4m2!t%n!+-+=E|?t%5?N8v2>Z{f8~FT(HMde<+k19Rix>N_Gb87STVYd~e}{$#g~_{0uf`KWMfsoz9v3L!3P3=Z{kxifjf}ZFH6UdB4wM+>iPjT<6YCFmw`j@L|J8}|tpK\n") + header_file.write("#include \n") + header_file.write("#include \n") + header_file.write(f"const size_t {tensor_name}_len = {npy_data.size};\n") + + if npy_data.dtype == "int8": + header_file.write(f"int8_t {tensor_name}[] =") + elif npy_data.dtype == "int32": + header_file.write(f"int32_t {tensor_name}[] = ") + elif npy_data.dtype == "uint8": + header_file.write(f"uint8_t {tensor_name}[] = ") + elif npy_data.dtype == "float32": + header_file.write(f"float {tensor_name}[] = ") + else: + raise ValueError("Data type not expected.") + + header_file.write("{") + for i in np.ndindex(npy_data.shape): + header_file.write(f"{npy_data[i]}, ") + header_file.write("};\n\n") + + header_file_bytes = bytes(header_file.getvalue(), "utf-8") + raw_path = pathlib.Path(output_path) / f"{tensor_name}.h" + ti = tarfile.TarInfo(name=str(raw_path)) + ti.size = len(header_file_bytes) + ti.mode = 0o644 + ti.type = tarfile.REGTYPE + tar_file.addfile(ti, io.BytesIO(header_file_bytes)) + + + + +def _open_tflite_model(model_path: str): + # Import TFLite model + tflite_model_buf = open(model_path, "rb").read() + try: + import tflite + + tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0) + except AttributeError: + import tflite.Model + + tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0) + + relay_mod, params = relay.frontend.from_tflite(tflite_model) + + return relay_mod, params + +def _get_test_data(testdata_dir): + + from PIL import Image + + image_files = ["digit-2.jpg"] + + for file in image_files: + img = Image.open(testdata_dir / file).resize((28, 28)) + img = np.asarray(img).astype("uint8") + sample = np.reshape(img, -1) + + output_shape = (1, 10) + + return sample, output_shape + + +def _apply_desired_layout_isa(relay_mod): + + desired_layouts = {'qnn.conv2d': ['NHWC', 'HWOI'], 'nn.conv2d': ['NHWC', 'HWOI']} + + seq = tvm.transform.Sequential([relay.transform.RemoveUnusedFunctions(), relay.transform.ConvertLayout(desired_layouts)]) + + with tvm.transform.PassContext(opt_level=3): + return seq(relay_mod) + +def _apply_desired_layout_no_isa(relay_mod): + + desired_layouts = {'qnn.conv2d': ['NHWC', 'HWIO'], 'nn.conv2d': ['NHWC', 'HWIO']} + + seq = tvm.transform.Sequential([relay.transform.RemoveUnusedFunctions(), relay.transform.ConvertLayout(desired_layouts)]) + + with tvm.transform.PassContext(opt_level=3): + return seq(relay_mod) + +def _generate_project(temp_dir, board, west_cmd, lowered, build_config, sample, output_shape): + + with tempfile.NamedTemporaryFile() as tar_temp_file: + with tarfile.open(tar_temp_file.name, "w:gz") as tf: + with tempfile.TemporaryDirectory() as tar_temp_dir: + model_files_path = os.path.join(tar_temp_dir, "include") + os.mkdir(model_files_path) + header_path = generate_c_interface_header( + lowered.libmod_name, ["input_1"], ["output"], model_files_path + ) + tf.add(header_path, arcname=os.path.relpath(header_path, tar_temp_dir)) + + _create_header_file("input_data", sample, "include", tf) + _create_header_file("output_data", np.zeros(shape=output_shape, dtype="float32"), "include", tf) + + project, _ = _build_project( + temp_dir, + board, + west_cmd, + lowered, + build_config, + extra_files_tar=tar_temp_file.name, + ) + + return project + + +def _run_model(temp_dir, board, west_cmd, lowered, build_config, sample, output_shape): + + project = _generate_project(temp_dir, board, west_cmd, lowered, build_config, sample, output_shape) + + project.flash() + + with project.transport() as transport: + timeout_read = 60 + # _get_message(transport, "#wakeup", timeout_sec=timeout_read) + transport.write(b"start\n", timeout_sec=5) + result_line = _get_message(transport, "#result", timeout_sec=timeout_read) + + result_line = result_line.strip("\n") + result_line = result_line.split(":") + result = int(result_line[1]) + time = int(result_line[2]) + logging.info(f"Result: {result}\ttime: {time} ms") + + return result, time + + +@tvm.testing.requires_micro +def test_armv7m_intrinsic(temp_dir, board, west_cmd, tvm_debug): + """Testing a ARM v7m SIMD extension.""" + + if board not in [ + "nrf5340dk", + "stm32f746xx_disco", + "stm32f746xx_nucleo", + "stm32l4r5zi_nucleo", + ]: + pytest.skip(msg="Platform does not support ARM v7m SIMD extenion.") + + model = conftest.ZEPHYR_BOARDS[board] + + build_config = {"debug": tvm_debug} + + this_dir = pathlib.Path(os.path.dirname(__file__)) + testdata_dir = this_dir.parent / "testdata" / "armv7m" + + relay_mod, params = _open_tflite_model(testdata_dir / "mnist_model_quant.tflite") + + sample, output_shape = _get_test_data(testdata_dir) + + relay_mod_isa = _apply_desired_layout_isa(relay_mod) + # kernel layout "HWIO" is not supported by arm_cpu SIMD extension (see tvm\python\relay\op\strategy\arm_cpu.py) + relay_mod_no_isa = _apply_desired_layout_no_isa(relay_mod) + + target = tvm.target.target.micro( + model, options=["-keys=arm_cpu,cpu", "-link-params=1", "--executor=aot", "--unpacked-api=1", "--interface-api=c"] + ) + + temp_dir_isa = temp_dir / "isa" + temp_dir_no_isa = temp_dir / "noisa" + + os.makedirs(temp_dir_isa, exist_ok=True) + os.makedirs(temp_dir_no_isa, exist_ok=True) + + with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): + lowered_isa = relay.build(relay_mod_isa, target, params=params) + lowered_no_isa = relay.build(relay_mod_no_isa, target, params=params) + result_isa, time_isa = _run_model(temp_dir_isa, board, west_cmd, lowered_isa, build_config, sample, output_shape) + result_no_isa, time_no_isa = _run_model(temp_dir_no_isa, board, west_cmd, lowered_no_isa, build_config, sample, output_shape) + + assert result_no_isa == result_isa + assert time_no_isa > time_isa + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__] + sys.argv[1:])) From 6d7bdaa3dd3c5077d9efdc57c691987df6bc9cfa Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Thu, 16 Sep 2021 00:47:27 -0700 Subject: [PATCH 03/22] Issue 8717 Add schedule for depthwise_conv2d_nhwc --- python/tvm/relay/op/strategy/x86.py | 2 +- python/tvm/topi/x86/depthwise_conv2d.py | 30 +++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/python/tvm/relay/op/strategy/x86.py b/python/tvm/relay/op/strategy/x86.py index 1c8d1b478cb1..553e90d975eb 100644 --- a/python/tvm/relay/op/strategy/x86.py +++ b/python/tvm/relay/op/strategy/x86.py @@ -214,7 +214,7 @@ def conv2d_strategy_cpu(attrs, inputs, out_type, target): ) strategy.add_implementation( wrap_compute_conv2d(topi.nn.depthwise_conv2d_nhwc), - wrap_topi_schedule(topi.generic.schedule_depthwise_conv2d_nhwc), + wrap_topi_schedule(topi.x86.schedule_depthwise_conv2d_nhwc), name="depthwise_conv2d_nhwc.generic", ) else: diff --git a/python/tvm/topi/x86/depthwise_conv2d.py b/python/tvm/topi/x86/depthwise_conv2d.py index 5e49c2cb3b78..46f70ffd575b 100644 --- a/python/tvm/topi/x86/depthwise_conv2d.py +++ b/python/tvm/topi/x86/depthwise_conv2d.py @@ -305,6 +305,36 @@ def _schedule_depthwise_conv2d_NCHWc_impl(s, cfg, data_vec, kernel_vec, conv_out return s +def schedule_depthwise_conv2d_nhwc(outs): + """Create schedule for depthwise conv2d in NHWC layout. + + Parameters + ---------- + outs : list[te.tensor.Tensor] + The output tensors. + + Returns + ------- + s : tvm.te.schedule.Schedule + The computation schedule for depthwise conv2d. + """ + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) + + def _callback(op): + """Traverse operators from computation graph""" + if "depthwise_conv2d_nhwc" in op.tag: + out = outs[0] + depthwise_conv2d_out = op.output(0) + data_pad = depthwise_conv2d_out.op.input_tensors[0] + s[data_pad].compute_inline() + s[depthwise_conv2d_out].compute_at(s[out], s[out].op.axis[3]) + s[out].fuse(*s[out].op.axis) + + traverse_inline(s, outs[0].op, _callback) + return s + + @depthwise_conv2d_infer_layout.register("cpu") def _depthwise_conv2d_infer_layout(workload, cfg): _, data, kernel, strides, padding, dilation, _, _, dtype = workload From 36aa10de2d526d07c4a9210bb27f205606f52b4b Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Fri, 17 Sep 2021 05:37:42 -0700 Subject: [PATCH 04/22] Implemented discussed changes. --- .../zephyr/base-box/base_box_test.sh | 2 +- tests/micro/zephyr/test_utils.py | 113 ++++++++++ tests/micro/zephyr/test_zephyr_aot.py | 107 +-------- tests/micro/zephyr/test_zephyr_armv7m.py | 205 +++++++----------- 4 files changed, 198 insertions(+), 229 deletions(-) create mode 100644 tests/micro/zephyr/test_utils.py diff --git a/apps/microtvm/reference-vm/zephyr/base-box/base_box_test.sh b/apps/microtvm/reference-vm/zephyr/base-box/base_box_test.sh index fd8bb7a2c24a..77980fdb49aa 100755 --- a/apps/microtvm/reference-vm/zephyr/base-box/base_box_test.sh +++ b/apps/microtvm/reference-vm/zephyr/base-box/base_box_test.sh @@ -38,4 +38,4 @@ else pytest tests/micro/zephyr/test_zephyr_aot.py --zephyr-board=${board} fi -pytest tests/micro/zephyr/test_zephyr_armv7m.py --zephyr-board=${board} \ No newline at end of file +pytest tests/micro/zephyr/test_zephyr_armv7m.py --zephyr-board=${board} diff --git a/tests/micro/zephyr/test_utils.py b/tests/micro/zephyr/test_utils.py new file mode 100644 index 000000000000..09acc61cad83 --- /dev/null +++ b/tests/micro/zephyr/test_utils.py @@ -0,0 +1,113 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import io +import logging +import pathlib +import tarfile + +import numpy as np + +import tvm.micro + +def build_project(temp_dir, zephyr_board, west_cmd, mod, build_config, extra_files_tar=None): + template_project_dir = ( + pathlib.Path(__file__).parent + / ".." + / ".." + / ".." + / "apps" + / "microtvm" + / "zephyr" + / "template_project" + ).resolve() + project_dir = temp_dir / "project" + project = tvm.micro.generate_project( + str(template_project_dir), + mod, + project_dir, + { + "extra_files_tar": extra_files_tar, + "project_type": "aot_demo", + "west_cmd": west_cmd, + "verbose": bool(build_config.get("debug")), + "zephyr_board": zephyr_board, + }, + ) + project.build() + return project, project_dir + + +def create_header_file(tensor_name, npy_data, output_path, tar_file): + """ + This method generates a header file containing the data contained in the numpy array provided. + It is used to capture the tensor data (for both inputs and expected outputs). + """ + header_file = io.StringIO() + header_file.write("#include \n") + header_file.write("#include \n") + header_file.write("#include \n") + header_file.write(f"const size_t {tensor_name}_len = {npy_data.size};\n") + + if npy_data.dtype == "int8": + header_file.write(f"int8_t {tensor_name}[] =") + elif npy_data.dtype == "int32": + header_file.write(f"int32_t {tensor_name}[] = ") + elif npy_data.dtype == "uint8": + header_file.write(f"uint8_t {tensor_name}[] = ") + elif npy_data.dtype == "float32": + header_file.write(f"float {tensor_name}[] = ") + else: + raise ValueError("Data type not expected.") + + header_file.write("{") + for i in np.ndindex(npy_data.shape): + header_file.write(f"{npy_data[i]}, ") + header_file.write("};\n\n") + + header_file_bytes = bytes(header_file.getvalue(), "utf-8") + raw_path = pathlib.Path(output_path) / f"{tensor_name}.h" + ti = tarfile.TarInfo(name=str(raw_path)) + ti.size = len(header_file_bytes) + ti.mode = 0o644 + ti.type = tarfile.REGTYPE + tar_file.addfile(ti, io.BytesIO(header_file_bytes)) + + +def _read_line(fd, timeout_sec: int): + data = "" + new_line = False + while True: + if new_line: + break + new_data = fd.read(1, timeout_sec=timeout_sec) + logging.debug(f"read data: {new_data}") + for item in new_data: + new_c = chr(item) + data = data + new_c + if new_c == "\n": + new_line = True + break + return data + + +def get_message(fd, expr: str, timeout_sec: int): + while True: + data = _read_line(fd, timeout_sec) + logging.debug(f"new line: {data}") + if expr in data: + return data diff --git a/tests/micro/zephyr/test_zephyr_aot.py b/tests/micro/zephyr/test_zephyr_aot.py index 1499d1ef27eb..6d2e7e5f12b9 100644 --- a/tests/micro/zephyr/test_zephyr_aot.py +++ b/tests/micro/zephyr/test_zephyr_aot.py @@ -39,99 +39,10 @@ from tvm.micro.interface_api import generate_c_interface_header import conftest +from test_utils import * _LOG = logging.getLogger(__name__) - -def _build_project(temp_dir, zephyr_board, west_cmd, mod, build_config, extra_files_tar=None): - template_project_dir = ( - pathlib.Path(__file__).parent - / ".." - / ".." - / ".." - / "apps" - / "microtvm" - / "zephyr" - / "template_project" - ).resolve() - project_dir = temp_dir / "project" - project = tvm.micro.generate_project( - str(template_project_dir), - mod, - project_dir, - { - "extra_files_tar": extra_files_tar, - "project_type": "aot_demo", - "west_cmd": west_cmd, - "verbose": bool(build_config.get("debug")), - "zephyr_board": zephyr_board, - }, - ) - project.build() - return project, project_dir - - -def _create_header_file(tensor_name, npy_data, output_path, tar_file): - """ - This method generates a header file containing the data contained in the numpy array provided. - It is used to capture the tensor data (for both inputs and expected outputs). - """ - header_file = io.StringIO() - header_file.write("#include \n") - header_file.write("#include \n") - header_file.write("#include \n") - header_file.write(f"const size_t {tensor_name}_len = {npy_data.size};\n") - - if npy_data.dtype == "int8": - header_file.write(f"int8_t {tensor_name}[] =") - elif npy_data.dtype == "int32": - header_file.write(f"int32_t {tensor_name}[] = ") - elif npy_data.dtype == "uint8": - header_file.write(f"uint8_t {tensor_name}[] = ") - elif npy_data.dtype == "float32": - header_file.write(f"float {tensor_name}[] = ") - else: - raise ValueError("Data type not expected.") - - header_file.write("{") - for i in np.ndindex(npy_data.shape): - header_file.write(f"{npy_data[i]}, ") - header_file.write("};\n\n") - - header_file_bytes = bytes(header_file.getvalue(), "utf-8") - raw_path = pathlib.Path(output_path) / f"{tensor_name}.h" - ti = tarfile.TarInfo(name=str(raw_path)) - ti.size = len(header_file_bytes) - ti.mode = 0o644 - ti.type = tarfile.REGTYPE - tar_file.addfile(ti, io.BytesIO(header_file_bytes)) - - -def _read_line(fd, timeout_sec: int): - data = "" - new_line = False - while True: - if new_line: - break - new_data = fd.read(1, timeout_sec=timeout_sec) - logging.debug(f"read data: {new_data}") - for item in new_data: - new_c = chr(item) - data = data + new_c - if new_c == "\n": - new_line = True - break - return data - - -def _get_message(fd, expr: str, timeout_sec: int): - while True: - data = _read_line(fd, timeout_sec) - logging.debug(f"new line: {data}") - if expr in data: - return data - - @tvm.testing.requires_micro def test_tflite(temp_dir, board, west_cmd, tvm_debug): """Testing a TFLite model.""" @@ -192,12 +103,12 @@ def test_tflite(temp_dir, board, west_cmd, tvm_debug): ) tf.add(header_path, arcname=os.path.relpath(header_path, tar_temp_dir)) - _create_header_file("input_data", sample, "include", tf) - _create_header_file( + create_header_file("input_data", sample, "include", tf) + create_header_file( "output_data", np.zeros(shape=output_shape, dtype="float32"), "include", tf ) - project, _ = _build_project( + project, _ = build_project( temp_dir, board, west_cmd, @@ -209,9 +120,9 @@ def test_tflite(temp_dir, board, west_cmd, tvm_debug): project.flash() with project.transport() as transport: timeout_read = 60 - _get_message(transport, "#wakeup", timeout_sec=timeout_read) + get_message(transport, "#wakeup", timeout_sec=timeout_read) transport.write(b"start\n", timeout_sec=5) - result_line = _get_message(transport, "#result", timeout_sec=timeout_read) + result_line = get_message(transport, "#result", timeout_sec=timeout_read) result_line = result_line.strip("\n") result_line = result_line.split(":") @@ -253,10 +164,10 @@ def test_qemu_make_fail(temp_dir, board, west_cmd, tvm_debug): lowered.libmod_name, ["input_1"], ["output"], model_files_path ) tf.add(header_path, arcname=os.path.relpath(header_path, tar_temp_dir)) - _create_header_file("input_data", np.zeros(shape=shape, dtype=dtype), "include", tf) - _create_header_file("output_data", np.zeros(shape=shape, dtype=dtype), "include", tf) + create_header_file("input_data", np.zeros(shape=shape, dtype=dtype), "include", tf) + create_header_file("output_data", np.zeros(shape=shape, dtype=dtype), "include", tf) - project, project_dir = _build_project( + project, project_dir = build_project( temp_dir, board, west_cmd, diff --git a/tests/micro/zephyr/test_zephyr_armv7m.py b/tests/micro/zephyr/test_zephyr_armv7m.py index 9128a793f184..4fe00f785207 100644 --- a/tests/micro/zephyr/test_zephyr_armv7m.py +++ b/tests/micro/zephyr/test_zephyr_armv7m.py @@ -15,10 +15,10 @@ # specific language governing permissions and limitations # under the License. -import io import logging import os import pathlib +import typing import sys import logging import tarfile @@ -33,120 +33,23 @@ import tvm.testing import tvm.relay as relay +from tvm.contrib.download import download_testdata from tvm.micro.interface_api import generate_c_interface_header import conftest +from test_utils import * _LOG = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) -PLATFORMS = conftest.PLATFORMS - -TEMPLATE_PROJECT_DIR = ( - pathlib.Path(__file__).parent - / ".." - / ".." - / ".." - / "apps" - / "microtvm" - / "zephyr" - / "template_project" -).resolve() - - -def _read_line(fd, timeout_sec: int): - data = "" - new_line = False - while True: - if new_line: - break - new_data = fd.read(1, timeout_sec=timeout_sec) - logging.debug(f"read data: {new_data}") - for item in new_data: - new_c = chr(item) - data = data + new_c - if new_c == "\n": - new_line = True - break - return data - - -def _get_message(fd, expr: str, timeout_sec: int): - while True: - data = _read_line(fd, timeout_sec) - logging.debug(f"new line: {data}") - if expr in data: - return data - -def _build_project(temp_dir, zephyr_board, west_cmd, mod, build_config, extra_files_tar=None): - template_project_dir = ( - pathlib.Path(__file__).parent - / ".." - / ".." - / ".." - / "apps" - / "microtvm" - / "zephyr" - / "template_project" - ).resolve() - project_dir = temp_dir / "project" - project = tvm.micro.generate_project( - str(template_project_dir), - mod, - project_dir, - { - "extra_files_tar": extra_files_tar, - "project_type": "aot_demo", - "west_cmd": west_cmd, - "verbose": bool(build_config.get("debug")), - "zephyr_board": zephyr_board, - }, - ) - project.build() - return project, project_dir - - -def _create_header_file(tensor_name, npy_data, output_path, tar_file): - """ - This method generates a header file containing the data contained in the numpy array provided. - It is used to capture the tensor data (for both inputs and expected outputs). - """ - header_file = io.StringIO() - header_file.write("#include \n") - header_file.write("#include \n") - header_file.write("#include \n") - header_file.write(f"const size_t {tensor_name}_len = {npy_data.size};\n") - - if npy_data.dtype == "int8": - header_file.write(f"int8_t {tensor_name}[] =") - elif npy_data.dtype == "int32": - header_file.write(f"int32_t {tensor_name}[] = ") - elif npy_data.dtype == "uint8": - header_file.write(f"uint8_t {tensor_name}[] = ") - elif npy_data.dtype == "float32": - header_file.write(f"float {tensor_name}[] = ") - else: - raise ValueError("Data type not expected.") - - header_file.write("{") - for i in np.ndindex(npy_data.shape): - header_file.write(f"{npy_data[i]}, ") - header_file.write("};\n\n") - - header_file_bytes = bytes(header_file.getvalue(), "utf-8") - raw_path = pathlib.Path(output_path) / f"{tensor_name}.h" - ti = tarfile.TarInfo(name=str(raw_path)) - ti.size = len(header_file_bytes) - ti.mode = 0o644 - ti.type = tarfile.REGTYPE - tar_file.addfile(ti, io.BytesIO(header_file_bytes)) - - - - -def _open_tflite_model(model_path: str): +def _open_tflite_model(): # Import TFLite model + + model_url = "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/model/mnist_model_quant.tflite" + model_path = download_testdata(model_url, "mnist_model_quant.tflite", module="model") + tflite_model_buf = open(model_path, "rb").read() + try: import tflite @@ -176,7 +79,7 @@ def _get_test_data(testdata_dir): return sample, output_shape -def _apply_desired_layout_isa(relay_mod): +def _apply_desired_layout_simd(relay_mod): desired_layouts = {'qnn.conv2d': ['NHWC', 'HWOI'], 'nn.conv2d': ['NHWC', 'HWOI']} @@ -185,7 +88,7 @@ def _apply_desired_layout_isa(relay_mod): with tvm.transform.PassContext(opt_level=3): return seq(relay_mod) -def _apply_desired_layout_no_isa(relay_mod): +def _apply_desired_layout_no_simd(relay_mod): desired_layouts = {'qnn.conv2d': ['NHWC', 'HWIO'], 'nn.conv2d': ['NHWC', 'HWIO']} @@ -194,6 +97,47 @@ def _apply_desired_layout_no_isa(relay_mod): with tvm.transform.PassContext(opt_level=3): return seq(relay_mod) + +def _loadCMSIS(temp_dir): + import os + from urllib.request import urlopen, urlretrieve + from urllib.error import HTTPError + import json + import requests + REPO_PATH = "ARM-software/CMSIS_5" + BRANCH = "master" + API_PATH_URL = f"https://api.github.com/repos/{REPO_PATH}/git/trees" + RAW_PATH_URL = f"https://raw.githubusercontent.com/{REPO_PATH}/{BRANCH}" + + url = "https://api.github.com/repos/ARM-software/CMSIS_5/git/trees/master?recursive=1" + r = requests.get(url) + res = r.json() + + include_trees = {} + + for file in res["tree"]: + if (file["path"] in {"CMSIS/DSP/Include", "CMSIS/DSP/Include/dsp", "CMSIS/NN/Include"}): + include_trees.update({file["path"]: file["sha"]}) + + for path, sha in include_trees.items(): + url = f"{API_PATH_URL}/{sha}" + content = json.load(urlopen(url)) + temp_path = f"{temp_dir}" + if path == "CMSIS/DSP/Include/dsp": + temp_path = f"{temp_dir}/dsp" + if not os.path.isdir(temp_path): + os.makedirs(temp_path) + for item in content['tree']: + if item["type"] == "blob": + file_name = item["path"] + file_url = f"{RAW_PATH_URL}/{path}/{file_name}" + print(file_name, " ", file_url) + try: + urlretrieve(file_url, f"{temp_path}/{file_name}") + except HTTPError as e: + print(f"Failed to download {file_url}: {e}") + + def _generate_project(temp_dir, board, west_cmd, lowered, build_config, sample, output_shape): with tempfile.NamedTemporaryFile() as tar_temp_file: @@ -201,15 +145,17 @@ def _generate_project(temp_dir, board, west_cmd, lowered, build_config, sample, with tempfile.TemporaryDirectory() as tar_temp_dir: model_files_path = os.path.join(tar_temp_dir, "include") os.mkdir(model_files_path) + _loadCMSIS(model_files_path) + tf.add(model_files_path, arcname=os.path.relpath(model_files_path, tar_temp_dir)) header_path = generate_c_interface_header( lowered.libmod_name, ["input_1"], ["output"], model_files_path ) tf.add(header_path, arcname=os.path.relpath(header_path, tar_temp_dir)) - _create_header_file("input_data", sample, "include", tf) - _create_header_file("output_data", np.zeros(shape=output_shape, dtype="float32"), "include", tf) + create_header_file("input_data", sample, "include", tf) + create_header_file("output_data", np.zeros(shape=output_shape, dtype="float32"), "include", tf) - project, _ = _build_project( + project, _ = build_project( temp_dir, board, west_cmd, @@ -229,9 +175,8 @@ def _run_model(temp_dir, board, west_cmd, lowered, build_config, sample, output_ with project.transport() as transport: timeout_read = 60 - # _get_message(transport, "#wakeup", timeout_sec=timeout_read) transport.write(b"start\n", timeout_sec=5) - result_line = _get_message(transport, "#result", timeout_sec=timeout_read) + result_line = get_message(transport, "#result", timeout_sec=timeout_read) result_line = result_line.strip("\n") result_line = result_line.split(":") @@ -247,10 +192,10 @@ def test_armv7m_intrinsic(temp_dir, board, west_cmd, tvm_debug): """Testing a ARM v7m SIMD extension.""" if board not in [ - "nrf5340dk", + "mps2_an521", "stm32f746xx_disco", - "stm32f746xx_nucleo", - "stm32l4r5zi_nucleo", + "nucleo_f746zg", + "nucleo_l4r5zi", ]: pytest.skip(msg="Platform does not support ARM v7m SIMD extenion.") @@ -259,34 +204,34 @@ def test_armv7m_intrinsic(temp_dir, board, west_cmd, tvm_debug): build_config = {"debug": tvm_debug} this_dir = pathlib.Path(os.path.dirname(__file__)) - testdata_dir = this_dir.parent / "testdata" / "armv7m" + testdata_dir = this_dir.parent / "testdata" / "mnist" - relay_mod, params = _open_tflite_model(testdata_dir / "mnist_model_quant.tflite") + relay_mod, params = _open_tflite_model() sample, output_shape = _get_test_data(testdata_dir) - relay_mod_isa = _apply_desired_layout_isa(relay_mod) + relay_mod_simd = _apply_desired_layout_simd(relay_mod) # kernel layout "HWIO" is not supported by arm_cpu SIMD extension (see tvm\python\relay\op\strategy\arm_cpu.py) - relay_mod_no_isa = _apply_desired_layout_no_isa(relay_mod) + relay_mod_no_simd = _apply_desired_layout_no_simd(relay_mod) target = tvm.target.target.micro( model, options=["-keys=arm_cpu,cpu", "-link-params=1", "--executor=aot", "--unpacked-api=1", "--interface-api=c"] ) - temp_dir_isa = temp_dir / "isa" - temp_dir_no_isa = temp_dir / "noisa" + temp_dir_simd = temp_dir / "simd" + temp_dir_no_simd = temp_dir / "nosimd" - os.makedirs(temp_dir_isa, exist_ok=True) - os.makedirs(temp_dir_no_isa, exist_ok=True) + os.makedirs(temp_dir_simd, exist_ok=True) + os.makedirs(temp_dir_no_simd, exist_ok=True) with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): - lowered_isa = relay.build(relay_mod_isa, target, params=params) - lowered_no_isa = relay.build(relay_mod_no_isa, target, params=params) - result_isa, time_isa = _run_model(temp_dir_isa, board, west_cmd, lowered_isa, build_config, sample, output_shape) - result_no_isa, time_no_isa = _run_model(temp_dir_no_isa, board, west_cmd, lowered_no_isa, build_config, sample, output_shape) + lowered_simd = relay.build(relay_mod_simd, target, params=params) + lowered_no_simd = relay.build(relay_mod_no_simd, target, params=params) + result_simd, time_simd = _run_model(temp_dir_simd, board, west_cmd, lowered_simd, build_config, sample, output_shape) + result_no_simd, time_no_simd = _run_model(temp_dir_no_simd, board, west_cmd, lowered_no_simd, build_config, sample, output_shape) - assert result_no_isa == result_isa - assert time_no_isa > time_isa + assert result_no_simd == result_simd + assert time_no_simd > time_simd if __name__ == "__main__": From 605cb1bf1fde2818181921ad9d3d3edc72cbb366 Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Fri, 17 Sep 2021 09:34:46 -0700 Subject: [PATCH 05/22] Removed unnecessary test files. --- tests/micro/testdata/armv7m/digit-2.jpg | Bin 572 -> 0 bytes .../testdata/armv7m/mnist_model_quant.tflite | Bin 29888 -> 0 bytes 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/micro/testdata/armv7m/digit-2.jpg delete mode 100644 tests/micro/testdata/armv7m/mnist_model_quant.tflite diff --git a/tests/micro/testdata/armv7m/digit-2.jpg b/tests/micro/testdata/armv7m/digit-2.jpg deleted file mode 100644 index b709a206b8d776215dcaa78643b22fe628b3c43a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 572 zcmV-C0>l0P*#F=F5K2Z#MgRc;000310RRC1+Wgv=4-_35A08bV92_7dE+-%&EF&BoC^soAFflYVG#@89JvcHvE;BST|G)qX2ml-a z9036l0RO}Q9{>OW1pxs80RaI300000000010s{mE1_uZU3Jd?l0JRVR0s#X90t5pE z1q1{D00Dgg0s{a95d{(Xb($mz{*4NnC+Tr5k)}+G+9pM!N9a z+Fgtq;Ufks4k5G6O=vOe_>A$lT&9{hIqq>&me#H&a?UfhKQ?v%>I1(TC zPo;Q8dSAiM82D?!`n~+#AhvSzq6>Sai_G#i?k&erpO=siPeWdhbMfE9x=i;{+Ft7# z+e`yT6p%#>F!kUap}qMH^{zG#jmxgXeWv)|M1xS$?n4;X*+RSr2LNRA&OU5{I`h)J Keu^lfzyI08i0m@} diff --git a/tests/micro/testdata/armv7m/mnist_model_quant.tflite b/tests/micro/testdata/armv7m/mnist_model_quant.tflite deleted file mode 100644 index 371235edccebfbc9ce7c715a980ddf7a6c39ce9c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 29888 zcmXWC2b3GvdFVeF49d)4fI;uQ>;l-{mu1;Ri4-YO%{|Xfv6DDGd3o>T<)3$yJ#mXH z%ZV*pwyZ9ZqDYFpbIIN17B08WE8+M)s;RbJ! zB>4{wx!aImH2A}Y{%S-1u0f{3TmNnIuOI-dbCXcB_7!|wxi{~xZ@nr$2n66SLtc-d z)iP%J-o48g74Iy*n0oge>C}lUtd6D|TO#kH5}dZyP!85{Is~UqQS}cj*EJ}v>ya_!Uph=u`Yb`n)ds1N57GJdHhq1 zee&44xs$!U&zi44GQ48j420`#yD#r8Sd}WyYnvN)Z#~jZ!ZK&PUdz=nOPFQRtkT9Qnf=`_=bCXv;Zd zN{wLysoB9+!}w$8mo(AAH6i}2xY;uWC-enpg~dC0v>5{DaGBa8SRp zK5sNk%RAd)+rFq@e zTkJuu#2sHcZic%(7<1trg*tTMHSo!;D{WZrIS;cqZ=&*A&#aE$G#}^W4{ba%2*sJ) z{MR(s?|B{;7LWY;>CB$+xy0PWs-*YTX-V$lboY~rX*(%m=$O4TOXpcAg@fzd2-O&HF!Q}??jk<3(=A}3K+-#J;*_iKQ zLx0rJUu%44Q={$G#yrnA%D+EQJH0I0D3$@+Du0o~ zByPZoPDB%-tkMMxZP5rGy*?=S`N2avzg$}HN$h~#p_;fJr$dk~uvJIN{>R0rV2Xc` zELMY51tp~`LScp(5K)SZsx3hA2BpPm!V60HQI%(LeGBtzWP4Q0UVkXY>#b>M-Os6z z0~9uSkmAmXc*>f1Ssq)HCygckobW^hfzuHwu8#4txnSVw8PQe~r!30-5Wbim|E6xK zWn1%c^Y#Cm`fhCK(A_WW^R#~abv0)jMRk!pUhEfCIOVFj<*A0k3)Wgn_M0I!?u5%6 zGR8{Ac`2z9OC}?Pj{ve9M-fX__cr4Nj`zG@91oN=nvZmDt+%!C+7mHXS`D5aO|u3v zH-}iY`v$S7kXKgFn8?pBVWlW25b#1ZKpcroOw3hT^l}c%hr9w01j@_0*1CGgG}?IxY(0}y79k4)v@DKnt$fB$eN}*f3;JsVG5o-f!UG>j}(m| z<3ko!fmV3nW5lu~jRHNUT0?AQrk5>_cChu+MwK2km|Nk;x3m;mtowC6Dfrr{PE&K~ z2?Zsa&b=sR_F3Ft+vvQLKK1M*i?fy5S1U4lr~+oaqu)?0f1A^MxKxv&%nUuQ%-qRM z!Qs|WoY~&APQz&XKk9hwd(gIA2b9gZht-fcUYA!{%$9cSNM8Mhi%S(B?jR3Mn9fOt zE0;QtynH6#{425$V`SMup>m{ndVL#K&z(L8-zi^w_$n{BnSD}xZ`}gTvs@4oFEPc; z3v6w`mI`0RSF_UYZmM=TxWLHfR%{Pu9ufnuEJYaT#Zl>6Y1_UK-*)DFFN46heD`lI z&n`^3WV5NzDmNcr>3b|IZ9a4w<=4fG+*?aCpOZH3W82l#{^{x%aaOg?Oy}=CECmiC z`Rz*xv=??2d9?#4++X#|UTqc7`=4t+#PjzC;NApWx5gDq2ci}oN2xFhF92lKsB?h{ zAQiQ`D!GkrFK8F%L`EF%Gl!2dM!eW*{tNPUDh-_H&qF&!Y1hFousV?D+i_taA~lsz z{gMFiGUe4Pu5`XSx9!T^i>D=8=_oBFWt)sWlw6GSBsI>DQL`{{E7dlS#AWo{tGR;9 z8~3%ute}>BCC?r(I-Nnl$fOg{nK^pJ=}}KB*BOJn-WQ)jINj5Aretm9c2r_75UPw|{jsNSSuXO2bk@Ozr5y7SHbtI98ST{#DS$^#gZKE^62^I6z=izJ2)kmIl zftN3Aw8H$`p5t-hbG){HVoNDPIipU`!8uw!E5@^AgdIio02KD8U=$;g94e1m6443; zo+yza>2#Q&vM6Cnpgf8!@Ypdlp2af7Dz%=&tI_}+*5hWT7=D}&#sli%TGgF`F(!jk zFZq~Zo)^TY+&EOur4d>d^+Bv8LTB)53=zc7@>y7&Dr8_ps?25-NjSl#)$=iW6~S?Y z$A%ySAlT@o(V(Oqkk+O31Waj)YD6%z#-(U@RQ{TmOW`JD%vCCp;U$n2MyU{T`&T(s zYNmqJ0aC|+X?Xyr7is}Yg@(BI%2>r+qSA0A4SCDeZL*SKQS)Sxzeg{olWCoxtg>Yb znGBP2r1kg-cDrEUTN-&AYWk-VO6s}%fmDa;c*U5~Q9;skq_tUW>oOEvt(8Pu$#%1{ zBRde0=z$T18xSVE0;noscVy~;lCr?{(=XqrhsE*a0nqpFP;9s6&=M70xzzX(1_6Ct zr|99}db3dABz-EIa+8zbWEOnfYC*wZ(sxy~VuZiu;S$xz`mQ{2ajl%wai>MWovPg) z;w5YG_>9;WUj@VNA-pQ|izy>%e%_*HX*oj{fO6M5&XZ8QraOw%Q|ClO2J1z=5)CM1dPe?jft#g&~H++%~ z?y3Bu+VjmttBF^G={p!xbTm}Ks34B>d!#%@kXKW(<7+Io9U*oGPz!?(sY6L1rc5%? zZV_HD$A~N@qpqtNK*E<*CUQoeJi~_h9%zZp5PDM_I=mI8(U`PO2&~e=0L&tx6U?H_ zJCLDZ;L>E3Zq=sAD6$tOsT!!PD~Rh44W%3uldY3O4@-3E-h5C!1*TZ|{1hwvYmB1A^pO@=jKcIK&zI-6y+6IbQDoaqF*R>{y4x@ABnp|mheqzu!LxT`@Bd{r9C^cQN5Fa(Iv%+n! zD%-O?=iy*5!H(vG6?OcoLF(G+7&X-RSeNRDG%&ejUDNPtjid8%p_J5atxLHJF(zKC z^b2{#;YyuZq9={Wf{0o5C)h5zH)0hA8)t_}daU_q>1@4#cOKc%C~F}ZN{Qz?sSXyI z7Bd(NwVXsPDYN*3N;D0yo6))+%D^0KJt2+9Awrr1A)p$nQnSY520I*+h$*P6SsYHU zc0e#!!>ks#57|^O%O9Z!v9}cD3vA+(3X)y2ZYmSf-uXb`h2nic z?GZ>n;ZGbO)@;C`*?rgUr(@f{*siSKy7V4(T{v?dmiAwJuJ)PZY-jh74Mi#U$Bv%= z-P?k;(Bsc`CeHr5`=i@T;Ya6YF8{~aXY7^f4sGuwb-vp4h5UZ(xpkxaSC@Xg^xgyi zep5mSEQY>#zo7rh+%Hz)iO3zrCpqz(HS%$gxN)FX2F~r`-+SVoxcl5@j*=jh#|>A@%1c6rOcPbSKEz0hiO7w%B>8ciy$5Gc{a zf2yXn(TiQp+FJx^VPLU}dg};%?HdP&L|!#FpB+*+acZ{v{mzcW1T5`}^)0Kzo~oZG z5JkxKX6lMqoHltLyhB}O;R#Vw^-=zO@WsV+wCM;n^#X&eB-Dlc{`?&$rWAHg;?KOu zs9TC1`AvbpNwE1x88u^ zMTOKMrCSGBFoMuIJRjey7uV!pozy>i{t99Ke&xF%Q+Xu})bvdim38I)%@(+=KJHv10^jbg(#cIHzV{nwJP6yqbm)9yb3J)dWM zSFmOKRlCX6R9R>bb{jvMbH}#B4~oZjZuBX4Pd_46Nz0L+LbG<<;#k{O~3uc)6U}68+PfCpL;2}>wjNWz7aVEYx{i4dq>vJ zt~|qS(I0&uJ9lM-mU*`up~P;RZ$M_fua;f&;||5I#lLXBoqE`G?6cUT4-fvjP`-Fn^^@t(mC(wT z-awa_15aCJ{C6&QeUM|YH(Z}nC|8Y(;twV?fl?~9+}>m5*M|HLO5hND1sB${q;CZ*}N9W%YFbobJ=8+b9x2J%d9 zPE@Q!wLB^$NX8*Yof47NDh@t1j6B3qlFO#=30we8sgC>UjB*D`N#}_z1SwVOxinSE zO!1)_qoVcVxFDY01qzvJVVjZXRN;De^Z{imdkmn34;QRD7QLxt$|Eg2EfWESO-oGv z+zPF@CzsVbNUfmO3w6Z0SsGoN6c88a5)q4ArRi`#omt~k2_<)+s=V)1IiB~vN~&ON z2!DQa^eg+K%il{@aRCj|6Z`3LI@*k-H6;sC4)XGl*DeCQy^~Fne^;tVdN-NOPpgDNchoBUt%_kf4zx=G*3F*A@q$_;y~KC)XW3Z-ot0 zC+c1Gl#dcUL+2!d%!PqiQ9c&4ZBn-sPIq-F2S*ed#fGe&c2IGy&0nJAqhZ)Z$@>V1 z`O?^nVQutIsjV*v7dy6kifSUx=?j4A{zTFe(sQeOY0iRomt=IS`wyHVMP3MSW<+E3px1G03Isk`Se0EEICR4;gTwP`_I+`ob(M z{n|bm7XI)w$4DGB3G43}dU+0|sDJ5pl&xAe^xlx3JV0w^g}UqdVS`uRw1}hEd(*fs z$d>36=_Y)<0~U&Uwf84N)Wy|pwn{8dKV|Lsc6`=KbBEnsi?mbDtMu2`tiTA~6 z%)7$c!H5JqP!cEV=2`*T9pIbjy!8pfJ68RVN?>B-!F9iFd2a(-&lO=_z4$qG52)W+ zc9M=UC}}r&- z;dlXbbAK>UtvaNIaJ95Ov6uIVTmt-ojT2^U1fNof&KNGrh1aTLMNbx1yHRsc>N~Pm z$jbo=Q6?k5 zAzIvC%?s)AXfeH!3IU~hPFk2tQR@>l7oS7;5*}~7=#(OBcyyN24I zXTLa5iSows3)!-GzskN~-KBMCm?P) z3q!j*O99gw9vxrPYYe&VphBowuIGK;*~ogXi7CXUr^wmHxt%oZ*2>}KJq4x2OXo_r zx8b~yo05y_n;(@qO-V43-44|J9&sCNGB~Q84}5r~hLzYQ?BQs3N?B@G=3OSTYqqe( zE-LeJ@?znQ zkCyNGZnM>XK2>u^FnlkyJ@duZbvDtP2Z1`*sY!&>T9Z1dweJLtDLFEaKs9G(YlN~%(Dfl!vU`1tGem2nJmb0m+V6hzh^t$A zZVg||RU+QUTvw9DEIVn6#U-L@+VP)_RhHr|HWP^Pk5abska8%$L+}%GQM_7DGn*h5 z(m5SNLqUqZmL@Y)O26L2GFIhvdr)j%5lqqr&CJ;M-QCxDzB}?oJol@630t_%z1jY7 zq&IhjWTHs-)qR z;)!BCtnKhxQ+rM#Z%?&K87z~4WbeiQS~(NL3v@%w!JeYq29Tel(VEg+ZupF0Q!=30G2}Etg5& z>Vg8I!JqA*$n5}4O>R|O{2Wp6tx{aFi%8UB*I#94r}=C`y2Q*~D@jta0FTWnjw)1b z(BX83`?6Av$Ss#*-n=Ell4bT3jG>Xk>ljp~^d+zg)FkUibhvacoggUN0IlRYHMDQ& zG!6L0lvOp$lyKF%!&-EjaUgE}B5uCF2fYmi>^I}?CC4=~w(8&8(AoIW&Cq*V_ic}` zNP1U(>snsAWPiF*trx1Gyx52FS@rT9KLcR3ar_e*6d`9_N$Bf8=P zi~U?j5UrPMzdTlBI0*qvjk9M|MheA_JHy)sHOPLNMv+3CmnF7VWKg^`TRm)V)uAp9 zk=9vIa298pMU=9GTvhI8KJjcO`Q8Xl@ii>cI@fQGA^%JOmv?}j1@4u(pZbwib%qE{6@(>B$et{f$4 z43a}+OsK5dp2Fo`7B2Q$0A=(@?gAxjP)${%$=#%lIaDr0Yc%guumD88>@w(9XBGrp zNn)FwV$nUpOshmx=7)9-jhp$J;H_v~IkRvwdE&%?@XG;l3$Vr7DPo z*~Wuhev-r(5j@;Zw1^l%3g3Bb7Ca`0EPA2T)3yS484IBRWyPE|s%tP-6lEDcM$qVP z*7ZbDzv*p;yh^Edz;Pjk&d87s0WagMl_R=`H8BZGavh#L?+~Gdj1l=Nrq85re9*Mq z@~#0iEowP!{USNZS+F0G;}T+hKT_cX&9~yMN=2=Z#h;Dq32|1Lta5NkK-*81T2XbU*3?X8HJ$)S>TU|QRi7Jry;Y^5 zH?Nw4*3Q|%@WH&Z2`Y4RM9>K*rNjs)NA{N$d!hi1l@`%MC{3{db)cnsaDI-t$4N>R z#HAeK%pm*sRGTUX4--n&wwJYY;)mNM!I6OQ?$qW-UTa8eTexd#Hjo2w*N!MRZJ>jd zduQEes}{#o?EJd@V)mgX*EO5DBK>VUeKs2_U%u^p(xl>`AIHS3jn5vkrYrLk&37_b zWaENB>AEtk63<)ab9i(L)u5&av%3_u%$nvuaZ@BC9^$jnsr=PB58r*O>xPUmF|$|r zoV)|sZZv(E`49xPztL?PoL9ngPS7sJ=4ZF&BHBgUxqPdJ=U>~Nd$3a5yfUw?oC6(_ z2f#yc>QFpAxwAB%ymv%ENard%beSJl~!TzOi$MlCaC%tG;a@v{rm{-xj|V9!tnB=e$hNn3EE9L2|Vu zU(9>4O?qJ+5700Bvk|pEeDSG&0oau{6)GE0wX!LnN{R~9Fc`<+J*NN-Acr0F>^8kQU*Jm~_9MHL6XNa7}338LfsHxK<>p zdp8hY+Ov-?K}s9aO0lidxTUH@-pQ$4lpg%M>HP5CRK|9qO~d zIIl+UIxJQZW#nCbAT*Fyhd>V3J8m)O~@3(^D`mfio}Vgb^wYkIekl%$uiLl z67XPF0NOsg8)lvwHm-96f6}_^x?As=@P!Ybi-t=Bsz?Q7N>jT=ZQp?|?Q`l*0D~)H zD__zVt)!V7mQsi*s-@%yrFut-uRP$05}a)Lm9g=${Yj-*=GW=jY!geD=W+c$n(@x$ zeB@SrbJ`4S>RvmyN-Z$!d4|`6^oTERv#vi>P z?3h%9KRj0Z#Cm+>IJbN~u-Lop^ljY!oj1D$j|6M`SG#-PyRu5T_|cP|-_Byl?|AIOR$zE6 zb+@TF+Vr~%?K_FS)!I$h#~)o(^!{%Avrm2{*J-IJOy!#^Qm_eQnb`swpIL7_6^UxK zBmkMiEDiq5UM~$9Lu~?RR8mT5NMP82OVhFHk5Z&Mj7@cNAbDw$GBa5={9OkBC zhvmw6jd4nO;F4N-UKBbkwb#om?mDf>SM%b$RYIIDR0AwNMYVqI=@pilQNo!yMk5vv zS8uREsc z*kh-2t21$q+8Sk4KHRo)iuH|4YBOr%l_@rot!}FZ08x6h?s+!t7jD`pFj}ZT?(ON_ zl1GXYluU$-rLv!?=O(@A=tlM@Tk?#pZhtC_*j#d$pK>e}qzQn~ zIt4v{Z&TH7p4Q`%p%Vv`UkEAN-Fc~?$%&b2gtX|Daw&yFis9vQOO{*HAE#(mzpR!E z{)0Rh3j$qy_X(2Q{FOsr{yd%uX4WOms%lw1%K)a9hJUuenx<8z)?qs~;?p)|$(g)p z)<3sl=;j|LhzcaeI9?qfV#`x`VfiSJ2Y(8MQ;I!9pun%@B&@>%LscXQ9<8c-8WD+j zM=Ata3$UXg;SW9zBs_2bW@Xg+uo0Y})Z|o8+^H5^eTkymurrp_c&oUD_J!BLYAGq{ zP6(TW0Z9@Q)F0EJQ9&?f+mfyTGH)*}w!RiZJk3y{#7t(PFYPn&yKY`R>|yK#XG*NS zCnAC7A$W@w$(AASS;{VN3_~9Jp|grfvP4S`t=3Tw{ju_L8z2FB?*TBSrq1}=#yd?j^{!YHyWV2Z8W=DyQ~Ddh^-&w z<9Qoun4<_ASzJsl$hA@Pm}HhyA0+YJeG{cBcaO|v#oI3W?l(8uQFlC{ZqHholus4- zScG5aN*(T9!F}jCzW#817#rK{;&!X$y9d?|{rLXf-XB>py7Qz$VoIIiMS9PEJEIPQ zwtejwCsnIGX@`_R!r3gQK9kKu z-|dfp|9H0U%n1xi)m`lCZJ~T~xl00A>Ab-YDuYE1 zR#0M^gcFRx!BRF7TzhkAXoj~5pD{ZjpgitFd9=?@Db zO2w(h&rk-c%s_b{<+`?LkY|pE1~=sUbWx_}sN0+?PHpV1$#*TaW-d_n&`@b&V6}D8 zp38@@mV%@+>}OLVQX!_phr0>m2?(>6BR{{iuIT4?PWo?Zs$;uqEF5V>PMZo5Lth;9S&+joTW31 z$G#Ic-*yO-xwS;m({cBi$vcPIhd=w$MVz{UeAse77<|O}ynJ#NBj@h=<8C%C&ODf;VnwB(;nsrNfohQ`!2L8&jK`(t z6g`R>6VBoEbTVI|q}e3OYGleG=q9U-y5g=V7E_=$0lRKV9zZ&SE$jRFf)cMF>0S?J z2|5wv%l32xO(A1IP1ZIJ!OY<6a3yG_!L%yK+9PCT@L5nOW$UO#dnUal41-v*dsUcv=rA)gr_FnTJNp+YVI+MrVLg+1^av0+Ov*B`~QHKodLpKjS~TmJgi zsYnY~L#_Y4U`FXiGC&Y6HC3hDveuu^Gc^u8f7H28?M-lw>rGV8zFbZPN0mx&-Xfm2Y*gbFQ~iSEO$n zXf&~Cg0}-p;YgUp+LST|64dowci{0RqYjJQjN}IGQNtHNDJF};dc3#Bp@V`TSDbG` zd}URRthJx`hN$Dv2X}g?r|*8ZX+wM_(aYe=ztzPYel|B{jvg`BOc+n{%%L`3YPkyx z_kv-YIs5s_jq>K5dx~Qc24b5GxX=@MHiL<+@9pzE*MR{+ou5qjPmYC6vpi7r96pHU zn~g+}a}ZR-0)Dapv3yVyE1xKLF(8_bu_mVm)R+!yR_8MnqLjiYgFSuB z#AYR4R5N!@CA|6#Tivrr5zIcdfA#F8go_?z=_JV_LNlvdNgK+yilqlY;R-8E(nFjG zU}g-2^r<+L0n%yaQW>fUd3i&zGuO>o-Dw1do-6vO^KS+1oC|-6Mssg^d`VB|)iar{ zD)jhX8*^s%U)Z@4Tm5|BVST9Hlk0RbbTUvYF!Mw;t!`+FgJ%tcQf?ed7l}ZYxvd_d z3D(PpN^#lvz3k1}b&e}Yt6CU$HcM$l~%S+zRzefkS6EjCu4p{H|2nSb`` zS2k6x0$3es{n|!#jkOT=eK$iv6i-6;SO?c!ddHjARZJDCJF6|RqL`(}HU{VM9#(uG zWMO<|GnEuX%>BNqMnsAVJ{5%nJLuEt6oV`oa}hr~@+Et$)ZdFsK_HwdGLL#aGR(5l zmX__xH+IU?I^))~O?dmfS{gSmTKUkQg_5d9wD}<4sz0R_L8KhZgMOX^gV#?>q@cwr z{KeD0pqZB*_*#D3*)(4|^YoB46?kd`0N7zW9JT=rEl+qhHyOmN|CU~4qzc31ttp*nY6Jb~Tbc=0+RR#rW`d@~9P%^s~ z2=AH%vz(}LnrbU12}isKW(-RLEwt6D+1AJWG)w zB}Xpcz8B-!KdT@f=Yk7@mb{zX4dZ-tHMR&6<25?--a6!6_|dsE9Lk(BMT70>{ z4%wXWmx-q&U)IiU4tv&f1#>7S^ntdUQg@vn)+?hLRuHFE__5WIB&8cpp|*KLIqo66AI9xqB{y~MLhISF8R zZhb(;da18;(AE66(mt`N`*k_Q8K_IY4GVJ=A)L{`B0*ZfVp^_MjBuP~r>VH1U0G4f zNbf^RzS0vll}cI7X6r(~u2b-^DO*oL`&7A zs!+6uMyrkcAz?{Pw8Id?1G`|+Hnm)~&x&b+Ld9F4VRKnK;ZD`@`gqN5_h)vj(r0{s za#x=fG!M45pj37y=ye`n4@SaIl&Gg3d%DQGWcygi`JxU4Ll^ zORVN+zHCwzYDEkwQDarQL=vskcIG~s+^O;yJ0HndV5zmcP}ys{9-Hj+>@xbE8$S;Y zfeRX?X5TU^FwhSjsUPbSHR8nVud4ynOkuH$#bt9} zwpT9UsklC)3+KpPof5@y30wP3~*DN*ijI$KZL~-z&C7yqw&^ zW!|5xEMPZeTJ{Nn*y5F%1aPm6(%F?}nOTy~qNU5|@1!BEeT_dM0kb&x&peqpA0bl%I4LVvPr_I1ucaL7}|**uU%D?s(kYL;H}c=n>$?duv>R)Jlu<0iYy zzp7NkRt1szHjH7>fIOIaHrvB3x;t|0EdAG}^@z!W$8`)w8nZ?i@+{}!ocF0q9a(v? zFwaWtaIbiYXf;}@^bKn}I^B)eZWtebf~^~}8y@kKtd*;D?5j}I8XuTGUD%egB&}R) zl}{;{U-f}JjU9o-EoN0p`V4_$1B$=%XFZy|%y&vDF+#TlAlxk2WtxsLXqsxKfEjse z7@Nxny$WNLgJ(>3SY0A4jaxY=1i z{jEYYo|+++bh35*^~0XZ(#bfFsm1KWp1^^dvmI(6sz2;e=(koJtDN>GM;i=L8^5h2 z25YQ5?3AKFg@WbvnNO3ClE9j?Vvp@aR%{9Kb4h7ts}#8(+Z)}2pDq)TC$_FI?bFx{ z(~TLmW2``R6L?5lGpt`by9(@K$>7dX&OM~Lk06VZL86{stzfKW2>4S7dpsZUT>iCv zxwduz&GA{2?uSv0Gc)m}BewC>ln~kM_0Xj|Pj-PVzgm$%XM}?k-pb|b3g_wSJ;sIL z+S9m5zMD*q?XDQ4nX}Vt@yO>ZkBSjDwd$MCE%@K|ezL=gl2pabh zqX<_)&3rz}L(JM32a%IlQzs#6=XE?nM`b0SwceM9!VtskQ8WUDL+%VudkktMSlCa! z-y?3 zIr4e-^mXmL&NjcsIr%|SXvo@@=*neIvT2NW7f$GTKxNaHr-)K~1HSE4A8fpn*k*g6 z!j;olrhQ?EGZbg0--jN*G@o6Ad}oiDhG>sLGz>*W%|#Azsz!Yyms=hya4GaSL(|Gcb2}D2JC%isst8OqCZJV*QCH1XvN%(VR*(^r zQY*I9K?Df`Jf6Jq_C$mqs*&Pqn3dqL;?yFtQ(_{JYdF0WV1Xjx(po9^gbr^wE^M-l}e@5$Y9}+gpfwli}^JoMq$@7fEbhH zIeNqZ6}6&FnaoL>Y8Zo8ElGh$8sa8Vw8Z{>8g>R|Q+R7|k;BmeELELOXV=nrQeC8B zjl~awd6yFrq7k4NU{RRn;IplYj-;Vel~H5HSF4egKH;D_fje5gg7nFFiy@_NvWjOV zQj9dZDjily2Ir%h^G90AJCdCaG8cHQ=+$p;Y{tbWvl;Ou0VKq$LUroI!7ul)l2n>4 zsW8!2^G1zJTL*(lm{0Rs>!#*%$PR~7PpVTEw5C91nlGlo#9p8?`%u*BgvsI$Dyfq5 zNqH<(=-gXr*$H+H_Zf*gLut?=?YBIHJiAB_}Ya}uqCO`N8g?{eo&iCTT0ruXvM;S1w2}^)HBFJ-c44NmIs)Z>qTqGJ^9U3EF z;pYlN*%YP^GUY}dsL7aWoGir8Fjdm(8IsV4b}Zh!M_e;K0&Zs>q>^6l-qnwo2V(Ea0bWsKt_IB(eYn?e_yQ_t)x#srcN zUtnxLVbaMrJapTomd69uA3S?z{t!5S&I!G*9}`o4`f?~A`|0H5yT9t#7}v0RK6;X< z{N}~-zug;|dgY9xsJ{qDyvjlBa=w#Ux>ZfG`jT|RyJ1Dg8Z zO0Sf!f2zw%Klps^)EmCXue|whIDc^|Y4E(2)l{c$cWo#|N#e)5mY==(yIb#^e2vfd zBV+L5k1rHQ5(Uj?_`CP*yu0PA%4=IcJn`lPG-gRvdtZHdy4U<8C_Mv?XrwQ;VdpD9 zz1y$RdT;#O73Ylil;r$r-veO!@!90HFP0Ae;FYxc%w(s2e%hZ>o&KaHGW^jDcIol( zr^fr&kF75M;z-*;7J$`z^9K+`R|cjPPH(^eypGIpwpCX6EUwl@p5{c}-2^;c2*j zzL2HCcIj$LO~70sI6$qoOVLFY`uzvB-AqZluI@|Ga)K}xe#yJ7vC2=71+1Av6s$Kf5VNeeUeg6YA(vALpwI~KLhdX z|KHwshR0Q04R7zgH*L|X_h#*tu8Ul;v21Kp10>jBQyoLF$(N89%#u(-H{F;TFc@QF z4DQ`lvt;$&R?h|B=GrYLu4Rx>=37?ZH2XHNaCl3 zsdg6G6HvO&4TcTSJ~$pCO zQ7XmoEt}+03iA?PhuSr*Xv*woTf5_sh(RO%tsq0$Ev{RT<@c5(i^L_ePF(3CZDwvE z<+n9e;mSO_ODvc!r%bGnW@Xv2tqB&d0^%_hEKwjEub`;O($*N&ha?%9*<=hyoTa4q zF)+7AC_>(m>kV>3Hv6PCoOHf*nJzymw4b#P_Y9@zX?+vlI$E-rh}fr{EbPMgJIq!= zD{D06@tz`dnmEIalLsy%?L?RB6k(`GFw%b9ba1MT)9XtgzTD3fXhy$itLO9HwY2uu zd%C_Q!jYCNa&%I9Qo28(Nbr{s)Q*ZCUxrB)UMbGzaKC8!{OzLmwE|jHbs#OtFp13* z8h148U6n;BYq5{o`)!}e;_pdgET1qjOzZDVpV{rBI^98uDC8VS6ssay$_#s^q@OS> zd|O+k%3~}QMn6w)SuEu4NiQU_sz2-;F&|5nsBXwl>6L|kW)43!{&wbhQ{lyvST`*u4QLNz|uHls0ZMBru8*arh*cFmf{6qDE8%yLh-MXPUk@eL(Jv=EUsL z{$Qm#m4Ois40J2FLb*$8nXEij*Q0xnM@@_5;|iTB>Oa1zx|q{mByJ`tdVRj~X}XB~ zEskoe#18O1g$XYIyOkB5Ktq=6(|Pz4P7S-!Fq!i%z0L&1?b9QDO=gm*h!MqGy|zFY zyyB~xa)#SQUan<|PW01+Rk2dzu~U5&nmmc}q>gZzk-e@Vtj&))sIlaTI9^Giko!tv z#jhMzHc7mhHsTaPI$Alc5)aj1%0anv#lQyX~jJS9SqYwP3 zI-_-PIp01-@fAwo8$(FvrwrC2M$X2^LhO|MgwURwpI@4L`*GeWe1u7_OO4`>IP+(c zXD7a!M=Fz5k=ctU@r^~cC9WniW74BDO7giz7Jk~VRTJ=K2y4uS`EIY0uUj!D717ch zi`)gd{gmOw-D*XI;HH(CV>w6~{Q|BqpL>FodAgzKa0Z@s84?uqvF_z(;^TBybGN^4 z8C8>*`6MrDrm*Smtip_up3r#kVM@e5Zz?)})On%!Sh0T)p(dHigesBlbw%(Ll82EH z>rn|lFgh{esiz!wiotk|F>Aoe_|HWv7EC#O2IhbPEY z+WOIeyCy7L#m_Z&cJ_~Wy`&->E-p4$IVw%m5V1`(AL>(cr9okeFXTH*{nAVC&nt*E*v zukTPHLml9S6xJZCk>9EQ);~SX+TdWdpNWO~mMWwyi~b-VKNB5M6ZxVM?2Iv_NNKW+ ztU*TBk-g(mSz%&|uPUT?+85H(tC~v>ReM8W;+O83Mq%|xhxLp}!c&V$-;UWXr5EAr z%}&QSOJwFcM3nmdrp_;75@Lb1Q6=cMC+$ufdpfMfUt(WxLXX=@BOF@Y!fcHju5RZycNR82Kj-n_urOeM~8e^+4 zlgwbAtjwC~cceEAd@veUJ6423g@Q?fA~B&3<+*6K1U;*}Vtw8UNmkpr^7tQ{Dp)+{ z%7uCTA?ebJ0O5h|x(E^Lwt~xJV@ycpjg2UH0&TZ}K!{!EvL$WNp{{~_3|Wv64<_)- zBLW#3U@KR5NnjP+6Ear1!afhTT7Z$`Pn{aNav0M=8V34S-1nvU0NQGAq=XFkZ zv{<#%CkApj*H7Kr7p(A)4UAbbrKg}VYto73~SoL zaKuy5s)+V=ep;-*VN0edO*9yL#8FN|}}f_tT+Mxrjor0M`|7={0)5 zSN|;-5+dwf8_FvunHYh{wCOA@Mi&MGwev6uA}LrfTE)wqSiRLVZv?MwOT;o+~p zL0n-G+enz=U5LzRN%GP{=H36l0%MWHgP~dLkmdjUwJ>)Y`ir3I>|)N zqQs`ZJL}3UFA0<;?G4zzga=O?HnD6NOX@Oq-kqkYqE0PKF5o=CjPOW(^mBgjh;N;6(TTYxzoU(1i(ykS7d_&o;zs?xA=j}y zUo#1J%*DPkLQ7%}Vd{qpT_Z`t#ggeVJ;`oo<4B33z`{7Yfv`H$nZdW`e^<9_X|9^5 zEu1;sf}LsR+5wRvH}?{ zsYJwvbFiZd2)x0;=HZ)42hs)t!z}v%#z^#@qe!cquChC>=rfGe1#x>wVAaNO!t{k? z#)yc?<;F?4bp%gV-0J07(xtp%3oUA~Mx-+GWyC7N@$n;hzC7WT@tlcagIR=S+6=r2 zdC}8WEOG52H729~-u;@o7=ts2jIkM)wR_Wg)I&WpBK~rJse2_c+A>xno~Grykw8im zC^&0Siv9ky^kC^3nNt2~dIy0hYNqk{)z0q42y{h0P8lEOiEhp#$m-WC#*Sy6#%D#{ zsvy1)7h)H2+eQYdMeZ{1RbN0)0e3)8I}xM=_zRjbBFKsm@L4p2AaWamgxMI(U4GzK z2&9)-5M(oO?sX&ZPqY}&HV+^OANUdH=MwHSMB12)TJe8bUAj`9)^mKKhg4p5`jGht=$ldl1cln_86&_A!>`4=47aH%9>Jfrwi#;8ZMnz+KGs+mRpYT8)X8_GR|j3>)Fq@F50Op|b1nNfCK#z=`UXSle~NOcbxsNU6D;8W>v#8w=t zE^b!!$1Ce)q)J4VaLSl0df`MWHE0Zo-Sc_YtnJ>8Dc`D&m>g2HDp$ zD`L=wxMC@tWPY>n(bJjptk}%JO6eDVwca(vrk^3XnR7fiCt z5i!2R!pBTRlrclbRGiTt7Dh*89>lH{=c*_^LRv_*Nu6#j?>DoJ3t(Oc8pY&Xp)iY}d({SnWP`nW!K`#HP~R#-X@OV33pj7)x)z zfAa2lJGZUmV)T6HrMjJnNmGHp5F67VN@Pjq0-JKN8`;Q>QOHl4~*rYJAje+7xwBEL-D~vv>^Y zSFRx^)pJ}wFESw{tavB=Fok?c%$!g}9lm0wnp0Sj8z?=*AI%mZrK^?;GZ&mAdlk2Z zGcE+mE8JtJ96S?vnTS&?rG5~*IitIFo}xHkJY$xx)i4%iV!I}w6Nv%nDBl~rt$(od z)4eS_k3Eq5XiK7Xs`;Sn_9l{T5yhCL`GbKLRq)pM<$^A9%LFDmp+$JFn{96CQHeJx zFVA=>+dYu&K)Q5Jd2&w%XF3N@^@*j!f_!6IF4KtTMmf89&M+@lq^*?m376ELTe9%& z3!+%A=+)FTfl99L(FS|5`n)b~{bXAJo5d%5X+8ipUA=CWx06Mz(p2Gjs`FwhqP5%N z3~y1S<+`=X(iHQ5pxKxJKsb;`dcS8JAUKXF(nPC{d@K(=RXRNAlPo>p| z(aJfmg+_!iH5((4-GOfneuB-&$=n#eTN|GC z3pxXqMH5j$MCc{D3XX^gj_TnrDQ??dez-bRHIoSDWk^D-gC!2`f`%`HUg0#IyKJfq zTq)^e-QD=56|TZYPc(6Az)^o@RM||l8wukB6CIaZ%X%@QrPZx&?gdjT?F;5*uR6>M zWA)^?J+D1HkC;tUe#+-bbwO%nfilH=EmM%L#!QMBcvpH*>r4y8-ELv@3bwD<&K;BZ zM`fx2-x?#R{plsRf+dyHqQH$QX)W7Ick;AUzCt}!MDb+g`6UiTB;Dw#T1U;w#qRk3~|9sTS${4hiYLCXA{v@3)~xa;Y~8*^m1TLQIrzC-BFCEb=tNREk0K62O+J-J7<<6lhXn&qT&F}z$P z!sOo6;@OuTuBe~#o&>I?jt+IdWz@g$@jFjvQKZw6`74*nl%u4XGp+xa_%i1TpL=B9 z$=45_y|Rzgb7uGJd#I}{)|-)BtT1MtxzKCq2u}5zIr0M;#7j(^Jv@c=2QV|S@K^QN zFSCb>4XwQPn5kq!aK>dA#IzpHw3p&mr`@JsNY55e)k#^+l#b+R9g{S{z1b+H6*#2C zdDy+0lC-qd>|?h!8hWNpGY3446U7twv~R{3$`i6yUS#qLc069j6$!gJSCnO=2w@;@ zNnFnxF!C7m6Kv!qs&2L9M%oj{mx1?PDhu_uMN z)2w>6Iapz{5~aO8BwN_SdLoE%tWj`C-OgOxQXkHzr0SF7oB~>HbxzSn-dhZbaUD{0 zkMC6Zkc#y!OOu$4yGQLEkr2MEj&{6j&~cCv>~m5EQ+d*YoV8rOp9VCX+wj{fn@q6;!*cU3w%+f$}7u&cq_4j0V ztG_Zb#SiN9+<1{9QcaKY>rZ2J_Eb4*Ma(60_9H^q{B)N(#d1=Z^io`vztI+pzK-uq zPRnUSqrSHNLqw-mN9oj>6Us_fHL~ihrJMv|C~TXGLL0CP4BV<4eR}xrd)gPYrOd z+quy5(3U&XpVc=5Pq?%Gwn;!Psyph(7pQ{NfX{W{Y@}z4YO2!1wl! zEt`vG{dEgKJ_f`3r?+p^*Eb8~p!o`bpMmr4K7a=RqP%~<>49zF149G27349{a&XSw z2v7mA7v#SN@P2?90MR;)!1pey6F36T%0zvE15ykR;1XcFt2$sP9FRuivu&^<$Q%vo zUz`gN#(rF;@rG1+^pECuD$mpg+EE=Z|!F9OPdsAB_W; z<%8Bo`GH}EdGLq&ST#t`>2N2=2g#r4fYw2EKy8EC6ZPwi`c6jc0U%a@00MYb9@_un zf7%q4`%5~_^&uV5Tsv;)KnhwBfZ%pa)c320CN{bj)Ue z7GRhJFwp$~cL1RiK)3@iyba(e;PVJT)PDDaI~)o4R0FI5h{^zfUj_nHUtn4U)u|TH zp?ad_s9rk(&H1(KzD6>P#@eVfTCGms%3v&AOrF3 zTmJAE+M-Qci|^gKX~&K&+ln`A*?#|~N4ESRw+7IC^S#_05MK!povQ$-n?Zou8Rd=Y zh++-E_x}g-(*fNz@_)bSPj@}?@WWdl*mhsp=DW&@*KGRJI*`1|e~$MY{~B=jhuUFI zA9UUWpw@$6P9JnEqF4j40MP%MJ^l*luF>Znu;R`tvSG{C$KanUSn|N8?Ta^W{@F5g zy)Ik!vqj%m1BTwT`Ib$OKCtQTty^w*r1;S-+aG@H(R;Sc>W{YbTK!AFedM+6Mb|d~ zKe9i%R-mW`r~vf;uDu6uIXwU0+Q0Ot_P>_@g@4fpT|)r;$o}ZsfT9}Uf&YO%KX3n1 zV5gt8e`)bA+rPB<7w!MgYQJj#YxV!e{bT#1>okBL*&oeA5ncP<2lW5W{@>mcgVliU znql>$@&AMMe;EJQ@~`pySfB6LXX+Wib8h^j^BYC9tN_senm&I4bl2!}b^QN=^?%v+ zzi9cnD!*#?YxT|nckFZHei_gY?GpghS`f^&6W#Bih|U=Sphw$@t_Ns(ZrwxU`lp_P zQCsY|cWcof9@_*~y~n{S_k(?}x}k>YfQhtpWLte{r^A*x4ol#ZP}a9t%R4L`?j5i z+KnGWryt)7MJV{H_q$$%{&MFIsN&&+&|Nou3Rz$M7<#N^7qq?Epfff<3%&8oz0mqC zUAhpt3)+>bhu*E*4Jif&Aok-AK&POs(C$5x;yFdZzhB=s{qEYv$EI_XNNE=qdZf z1rY3Me(ljeyN;lC|0DQ56I}-~z}!U7ApoclhzG#;kmx+6{AZrd8s&d`Py-CGEU^~W z%jd($9mVkVFBZTvd)2UM`x$uG6YJr>EUbXHUfBe%Gu{cOZ+f8mH=AnV-QF^I`Fpol zcTsME52>z)O)H;+|FH0W_(9_~_@11z@b>;yux?`sT=v_?VRz$0Fm(AoIQ^aVFi!Y! z^|Nm*fwA{w!4GQRsisW44ZpUn8V-hk3(srW1CyS~fybP;dbsVsHo|;N z2K?6cU2w8y3%r)M2A;y-0Pk6|1Ww*~GkoOF#_BZbeE7{jG2lHnT!f$F6~POXPr*;I z)>em~)WCh0m%;U?HL&`HyWjze4u1ZIZ7}z84hU8b*D zckyE^RK5<;J-2p7m$?Q9ZL`p!?J+hqL!8tdx!Ixn`pQM!7s+}ZDM^E#e57BuZaoot z?uIkEKR=Fv9{DsKI{x7yo%D=NckfZNZu;DmPH;<;u4A2BH?Ld`DL+r>a*uK#*$FXZ zs4j(`zHU&*Jx+n1d30F!^Xu?G_wqk;&icPP0?e+%XuDJ3e^{a8Io44 zFOWaG4*zGI%o;@vLdU{e;n)7O7QW^Azrh>-_B8yvC$_@xK+nK&+N<#G6>q=~i|&K( z`Q#7qZ?r4m&+l6WuaUh1zq#-Zn7#IS`1Er%@E`B^E4*pt=dkLPa`?T}!*IvML+}kl z_ruS%t%lz@xC72@`4m2!t%n!+-+=E|?t%5?N8v2>Z{f8~FT(HMde<+k19Rix>N_Gb87STVYd~e}{$#g~_{0uf`KWMfsoz9v3L!3P3=Z{kxifjf}ZFH6UdB4wM+>iPjT<6YCFmw`j@L|J8}|tpK Date: Fri, 17 Sep 2021 10:59:07 -0700 Subject: [PATCH 06/22] Formatting fixed. --- tests/micro/zephyr/test_zephyr_aot.py | 9 ++++----- tests/micro/zephyr/test_zephyr_armv7m.py | 22 +++++++++++++--------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/tests/micro/zephyr/test_zephyr_aot.py b/tests/micro/zephyr/test_zephyr_aot.py index 6d2e7e5f12b9..84e9b1ff4b64 100644 --- a/tests/micro/zephyr/test_zephyr_aot.py +++ b/tests/micro/zephyr/test_zephyr_aot.py @@ -15,11 +15,9 @@ # specific language governing permissions and limitations # under the License. -import io import logging import os import sys -import logging import pathlib import tarfile import tempfile @@ -32,14 +30,15 @@ import tvm.micro from tvm.micro.project_api import server import tvm.testing -import tvm.relay as relay +from tvm import relay -from tvm.contrib import utils from tvm.contrib.download import download_testdata from tvm.micro.interface_api import generate_c_interface_header import conftest -from test_utils import * +from test_utils import create_header_file +from test_utils import build_project +from test_utils import get_message _LOG = logging.getLogger(__name__) diff --git a/tests/micro/zephyr/test_zephyr_armv7m.py b/tests/micro/zephyr/test_zephyr_armv7m.py index 4fe00f785207..218993f32434 100644 --- a/tests/micro/zephyr/test_zephyr_armv7m.py +++ b/tests/micro/zephyr/test_zephyr_armv7m.py @@ -18,30 +18,39 @@ import logging import os import pathlib -import typing import sys -import logging import tarfile import tempfile +from urllib.request import urlopen, urlretrieve +from urllib.error import HTTPError +import json + import pytest import numpy as np +import requests + +from test_utils import create_header_file +from test_utils import build_project +from test_utils import get_message + import tvm import tvm.rpc import tvm.micro import tvm.testing -import tvm.relay as relay +from tvm import relay from tvm.contrib.download import download_testdata from tvm.micro.interface_api import generate_c_interface_header import conftest -from test_utils import * + _LOG = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) + def _open_tflite_model(): # Import TFLite model @@ -99,11 +108,6 @@ def _apply_desired_layout_no_simd(relay_mod): def _loadCMSIS(temp_dir): - import os - from urllib.request import urlopen, urlretrieve - from urllib.error import HTTPError - import json - import requests REPO_PATH = "ARM-software/CMSIS_5" BRANCH = "master" API_PATH_URL = f"https://api.github.com/repos/{REPO_PATH}/git/trees" From 4cdf12c4fb795d476554790497c37191d09c9678 Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Fri, 17 Sep 2021 11:03:17 -0700 Subject: [PATCH 07/22] Formatting fixed2. --- tests/micro/zephyr/test_zephyr_aot.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/micro/zephyr/test_zephyr_aot.py b/tests/micro/zephyr/test_zephyr_aot.py index 84e9b1ff4b64..7cb865d53fbe 100644 --- a/tests/micro/zephyr/test_zephyr_aot.py +++ b/tests/micro/zephyr/test_zephyr_aot.py @@ -42,6 +42,7 @@ _LOG = logging.getLogger(__name__) + @tvm.testing.requires_micro def test_tflite(temp_dir, board, west_cmd, tvm_debug): """Testing a TFLite model.""" From 5288a521aef141170d5e93b4205566bdcd038ab4 Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Fri, 17 Sep 2021 11:17:09 -0700 Subject: [PATCH 08/22] Formatting fixed3. --- .../arm_cpu/cortex_m7/conv2d/direct_simd.py | 7 +++- tests/micro/zephyr/test_utils.py | 1 + tests/micro/zephyr/test_zephyr_armv7m.py | 42 ++++++++++++++----- 3 files changed, 37 insertions(+), 13 deletions(-) diff --git a/python/tvm/topi/arm_cpu/cortex_m7/conv2d/direct_simd.py b/python/tvm/topi/arm_cpu/cortex_m7/conv2d/direct_simd.py index 1842aeca431b..307312076a7e 100644 --- a/python/tvm/topi/arm_cpu/cortex_m7/conv2d/direct_simd.py +++ b/python/tvm/topi/arm_cpu/cortex_m7/conv2d/direct_simd.py @@ -114,9 +114,12 @@ def conv2d_direct_simd_compute(cfg, data, kernel, strides, padding, dilation, ou owo, owi = cfg.define_split("tile_ow", ow, policy="factors", num_outputs=2) cio, cii = cfg.define_split( - "tile_ci", ci, policy="factors", num_outputs=2, + "tile_ci", + ci, + policy="factors", + num_outputs=2, # TODO: check case with in_channels.value % 4 != 0 with AutoTVM - filter=None if cfg.is_fallback else lambda x: x.size[-1] % 4 == 0 + filter=None if cfg.is_fallback else lambda x: x.size[-1] % 4 == 0, ) coo, coi = cfg.define_split("tile_co", co, policy="factors", num_outputs=2) diff --git a/tests/micro/zephyr/test_utils.py b/tests/micro/zephyr/test_utils.py index 09acc61cad83..8e6aba36da80 100644 --- a/tests/micro/zephyr/test_utils.py +++ b/tests/micro/zephyr/test_utils.py @@ -24,6 +24,7 @@ import tvm.micro + def build_project(temp_dir, zephyr_board, west_cmd, mod, build_config, extra_files_tar=None): template_project_dir = ( pathlib.Path(__file__).parent diff --git a/tests/micro/zephyr/test_zephyr_armv7m.py b/tests/micro/zephyr/test_zephyr_armv7m.py index 218993f32434..6c5c968ed724 100644 --- a/tests/micro/zephyr/test_zephyr_armv7m.py +++ b/tests/micro/zephyr/test_zephyr_armv7m.py @@ -72,6 +72,7 @@ def _open_tflite_model(): return relay_mod, params + def _get_test_data(testdata_dir): from PIL import Image @@ -90,18 +91,22 @@ def _get_test_data(testdata_dir): def _apply_desired_layout_simd(relay_mod): - desired_layouts = {'qnn.conv2d': ['NHWC', 'HWOI'], 'nn.conv2d': ['NHWC', 'HWOI']} + desired_layouts = {"qnn.conv2d": ["NHWC", "HWOI"], "nn.conv2d": ["NHWC", "HWOI"]} - seq = tvm.transform.Sequential([relay.transform.RemoveUnusedFunctions(), relay.transform.ConvertLayout(desired_layouts)]) + seq = tvm.transform.Sequential( + [relay.transform.RemoveUnusedFunctions(), relay.transform.ConvertLayout(desired_layouts)] + ) with tvm.transform.PassContext(opt_level=3): return seq(relay_mod) def _apply_desired_layout_no_simd(relay_mod): - desired_layouts = {'qnn.conv2d': ['NHWC', 'HWIO'], 'nn.conv2d': ['NHWC', 'HWIO']} + desired_layouts = {"qnn.conv2d": ["NHWC", "HWIO"], "nn.conv2d": ["NHWC", "HWIO"]} - seq = tvm.transform.Sequential([relay.transform.RemoveUnusedFunctions(), relay.transform.ConvertLayout(desired_layouts)]) + seq = tvm.transform.Sequential( + [relay.transform.RemoveUnusedFunctions(), relay.transform.ConvertLayout(desired_layouts)] + ) with tvm.transform.PassContext(opt_level=3): return seq(relay_mod) @@ -120,7 +125,7 @@ def _loadCMSIS(temp_dir): include_trees = {} for file in res["tree"]: - if (file["path"] in {"CMSIS/DSP/Include", "CMSIS/DSP/Include/dsp", "CMSIS/NN/Include"}): + if file["path"] in {"CMSIS/DSP/Include", "CMSIS/DSP/Include/dsp", "CMSIS/NN/Include"}: include_trees.update({file["path"]: file["sha"]}) for path, sha in include_trees.items(): @@ -131,7 +136,7 @@ def _loadCMSIS(temp_dir): temp_path = f"{temp_dir}/dsp" if not os.path.isdir(temp_path): os.makedirs(temp_path) - for item in content['tree']: + for item in content["tree"]: if item["type"] == "blob": file_name = item["path"] file_url = f"{RAW_PATH_URL}/{path}/{file_name}" @@ -157,7 +162,9 @@ def _generate_project(temp_dir, board, west_cmd, lowered, build_config, sample, tf.add(header_path, arcname=os.path.relpath(header_path, tar_temp_dir)) create_header_file("input_data", sample, "include", tf) - create_header_file("output_data", np.zeros(shape=output_shape, dtype="float32"), "include", tf) + create_header_file( + "output_data", np.zeros(shape=output_shape, dtype="float32"), "include", tf + ) project, _ = build_project( temp_dir, @@ -173,7 +180,9 @@ def _generate_project(temp_dir, board, west_cmd, lowered, build_config, sample, def _run_model(temp_dir, board, west_cmd, lowered, build_config, sample, output_shape): - project = _generate_project(temp_dir, board, west_cmd, lowered, build_config, sample, output_shape) + project = _generate_project( + temp_dir, board, west_cmd, lowered, build_config, sample, output_shape + ) project.flash() @@ -219,7 +228,14 @@ def test_armv7m_intrinsic(temp_dir, board, west_cmd, tvm_debug): relay_mod_no_simd = _apply_desired_layout_no_simd(relay_mod) target = tvm.target.target.micro( - model, options=["-keys=arm_cpu,cpu", "-link-params=1", "--executor=aot", "--unpacked-api=1", "--interface-api=c"] + model, + options=[ + "-keys=arm_cpu,cpu", + "-link-params=1", + "--executor=aot", + "--unpacked-api=1", + "--interface-api=c", + ], ) temp_dir_simd = temp_dir / "simd" @@ -231,8 +247,12 @@ def test_armv7m_intrinsic(temp_dir, board, west_cmd, tvm_debug): with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): lowered_simd = relay.build(relay_mod_simd, target, params=params) lowered_no_simd = relay.build(relay_mod_no_simd, target, params=params) - result_simd, time_simd = _run_model(temp_dir_simd, board, west_cmd, lowered_simd, build_config, sample, output_shape) - result_no_simd, time_no_simd = _run_model(temp_dir_no_simd, board, west_cmd, lowered_no_simd, build_config, sample, output_shape) + result_simd, time_simd = _run_model( + temp_dir_simd, board, west_cmd, lowered_simd, build_config, sample, output_shape + ) + result_no_simd, time_no_simd = _run_model( + temp_dir_no_simd, board, west_cmd, lowered_no_simd, build_config, sample, output_shape + ) assert result_no_simd == result_simd assert time_no_simd > time_simd From f844d74dbe1463cb993c4e65ade987a718258309 Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Fri, 17 Sep 2021 11:24:47 -0700 Subject: [PATCH 09/22] Formatting fixed4. --- tests/micro/zephyr/test_zephyr_armv7m.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/micro/zephyr/test_zephyr_armv7m.py b/tests/micro/zephyr/test_zephyr_armv7m.py index 6c5c968ed724..7f3cbf647722 100644 --- a/tests/micro/zephyr/test_zephyr_armv7m.py +++ b/tests/micro/zephyr/test_zephyr_armv7m.py @@ -100,6 +100,7 @@ def _apply_desired_layout_simd(relay_mod): with tvm.transform.PassContext(opt_level=3): return seq(relay_mod) + def _apply_desired_layout_no_simd(relay_mod): desired_layouts = {"qnn.conv2d": ["NHWC", "HWIO"], "nn.conv2d": ["NHWC", "HWIO"]} From 47e3db21ed28dbc27580bf06efc04eb617c66ee0 Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Fri, 17 Sep 2021 11:31:39 -0700 Subject: [PATCH 10/22] Formatting fixed5. --- python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py b/python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py index fbfc3e65bb39..9a00fe272087 100644 --- a/python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py +++ b/python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py @@ -47,7 +47,7 @@ def intrin_gemm_MxKxN(M, K, N, in_dtype, out_dtype): if isinstance(N, tvm.tir.IntImm): N = N.value # TODO(weberlo, areusch): support more dtypes? - assert in_dtype == "int8" or in_dtype == "int16" + assert in_dtype in ("int8", "int16") assert out_dtype == "int32" A = te.placeholder((M, K), name="a", dtype=in_dtype) B = te.placeholder((N, K), name="b", dtype=in_dtype) @@ -122,7 +122,7 @@ def _body(): def gemm_MxKxN_impl(M, K, N, uniq_id): """Emit C code for gemm impl.""" # TODO(weberlo, areusch): are there any SIMD tricks to zero out arrays quickly? - aa_pad_size = M * K + # aa_pad_size = M * K bb_pad_size = N * K # code reference: CMSIS-NN paper (https://arxiv.org/abs/1801.06601) cc_code = f""" From 84ac7669f532fa6371f4cf92efa7f05142c69631 Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Mon, 20 Sep 2021 04:01:13 -0700 Subject: [PATCH 11/22] Fixed test time result checking. --- tests/micro/zephyr/test_zephyr_armv7m.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/micro/zephyr/test_zephyr_armv7m.py b/tests/micro/zephyr/test_zephyr_armv7m.py index 7f3cbf647722..7c25719941ed 100644 --- a/tests/micro/zephyr/test_zephyr_armv7m.py +++ b/tests/micro/zephyr/test_zephyr_armv7m.py @@ -255,8 +255,12 @@ def test_armv7m_intrinsic(temp_dir, board, west_cmd, tvm_debug): temp_dir_no_simd, board, west_cmd, lowered_no_simd, build_config, sample, output_shape ) - assert result_no_simd == result_simd - assert time_no_simd > time_simd + assert result_no_simd == result_simd == 2 + + if board not in [ + "mps2_an521", + ]: + assert time_no_simd > time_simd if __name__ == "__main__": From c26511319542532f9c78436f64f1e3e5a7313e02 Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Mon, 20 Sep 2021 09:42:19 -0700 Subject: [PATCH 12/22] Check rebuild. --- tests/micro/zephyr/test_zephyr_armv7m.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/micro/zephyr/test_zephyr_armv7m.py b/tests/micro/zephyr/test_zephyr_armv7m.py index 7c25719941ed..8c471ecedd81 100644 --- a/tests/micro/zephyr/test_zephyr_armv7m.py +++ b/tests/micro/zephyr/test_zephyr_armv7m.py @@ -225,6 +225,7 @@ def test_armv7m_intrinsic(temp_dir, board, west_cmd, tvm_debug): sample, output_shape = _get_test_data(testdata_dir) relay_mod_simd = _apply_desired_layout_simd(relay_mod) + # kernel layout "HWIO" is not supported by arm_cpu SIMD extension (see tvm\python\relay\op\strategy\arm_cpu.py) relay_mod_no_simd = _apply_desired_layout_no_simd(relay_mod) From 83a86ddacfbe78a6cec8ea6118ff834121ec7e15 Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Mon, 20 Sep 2021 09:58:51 -0700 Subject: [PATCH 13/22] Formatting fixed. --- tests/micro/zephyr/test_zephyr_armv7m.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/micro/zephyr/test_zephyr_armv7m.py b/tests/micro/zephyr/test_zephyr_armv7m.py index 8c471ecedd81..7c25719941ed 100644 --- a/tests/micro/zephyr/test_zephyr_armv7m.py +++ b/tests/micro/zephyr/test_zephyr_armv7m.py @@ -225,7 +225,6 @@ def test_armv7m_intrinsic(temp_dir, board, west_cmd, tvm_debug): sample, output_shape = _get_test_data(testdata_dir) relay_mod_simd = _apply_desired_layout_simd(relay_mod) - # kernel layout "HWIO" is not supported by arm_cpu SIMD extension (see tvm\python\relay\op\strategy\arm_cpu.py) relay_mod_no_simd = _apply_desired_layout_no_simd(relay_mod) From 3d8b944f8a65f21ccca17742ef6977f42b987af8 Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Mon, 20 Sep 2021 09:58:51 -0700 Subject: [PATCH 14/22] Formatting fixed. --- tests/micro/zephyr/test_zephyr_armv7m.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/micro/zephyr/test_zephyr_armv7m.py b/tests/micro/zephyr/test_zephyr_armv7m.py index 8c471ecedd81..7c25719941ed 100644 --- a/tests/micro/zephyr/test_zephyr_armv7m.py +++ b/tests/micro/zephyr/test_zephyr_armv7m.py @@ -225,7 +225,6 @@ def test_armv7m_intrinsic(temp_dir, board, west_cmd, tvm_debug): sample, output_shape = _get_test_data(testdata_dir) relay_mod_simd = _apply_desired_layout_simd(relay_mod) - # kernel layout "HWIO" is not supported by arm_cpu SIMD extension (see tvm\python\relay\op\strategy\arm_cpu.py) relay_mod_no_simd = _apply_desired_layout_no_simd(relay_mod) From 0ccb5a01495d02f521eea2af9efa6a3153c4f72b Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Thu, 23 Sep 2021 06:57:17 -0700 Subject: [PATCH 15/22] Add default DepthwiseConv2D schedule in NHWC layout for arm cpu --- python/tvm/relay/op/strategy/arm_cpu.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/python/tvm/relay/op/strategy/arm_cpu.py b/python/tvm/relay/op/strategy/arm_cpu.py index 005eae68b8b7..80cce9a4b57e 100644 --- a/python/tvm/relay/op/strategy/arm_cpu.py +++ b/python/tvm/relay/op/strategy/arm_cpu.py @@ -197,11 +197,19 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target): ) elif layout == "NHWC": assert kernel_layout == "HWOI" - strategy.add_implementation( - wrap_compute_conv2d(topi.arm_cpu.compute_depthwise_conv2d_nhwc), - wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nhwc), - name="depthwise_conv2d_nhwc.arm_cpu", - ) + is_aarch64 = topi.arm_cpu.arm_utils.is_aarch64_arm() + if is_aarch64 or "+neon" in target.mattr: + strategy.add_implementation( + wrap_compute_conv2d(topi.arm_cpu.compute_depthwise_conv2d_nhwc), + wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nhwc), + name="depthwise_conv2d_nhwc.arm_cpu", + ) + else: + strategy.add_implementation( + wrap_compute_conv2d(topi.nn.depthwise_conv2d_nhwc), + wrap_topi_schedule(topi.x86.schedule_depthwise_conv2d_nhwc), + name="depthwise_conv2d_nhwc.x86", + ) else: raise RuntimeError("Unsupported depthwise_conv2d layout {} for arm cpu".format(layout)) else: # group_conv2d From 80f936e8c56d13dcffb657e9f0042be52d01ff85 Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Mon, 27 Sep 2021 03:25:21 -0700 Subject: [PATCH 16/22] Fixed micro model library test. Checking size reduced to 16 bytes from 2466816. --- python/tvm/relay/op/strategy/x86.py | 2 +- tests/python/unittest/test_micro_model_library_format.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/tvm/relay/op/strategy/x86.py b/python/tvm/relay/op/strategy/x86.py index 553e90d975eb..8dacf262b9c4 100644 --- a/python/tvm/relay/op/strategy/x86.py +++ b/python/tvm/relay/op/strategy/x86.py @@ -215,7 +215,7 @@ def conv2d_strategy_cpu(attrs, inputs, out_type, target): strategy.add_implementation( wrap_compute_conv2d(topi.nn.depthwise_conv2d_nhwc), wrap_topi_schedule(topi.x86.schedule_depthwise_conv2d_nhwc), - name="depthwise_conv2d_nhwc.generic", + name="depthwise_conv2d_nhwc.x86", ) else: raise RuntimeError("Unsupported depthwise_conv2d layout {}".format(layout)) diff --git a/tests/python/unittest/test_micro_model_library_format.py b/tests/python/unittest/test_micro_model_library_format.py index 92c1174e728c..69e8a35bf1b8 100644 --- a/tests/python/unittest/test_micro_model_library_format.py +++ b/tests/python/unittest/test_micro_model_library_format.py @@ -318,11 +318,11 @@ def @main(%p0: Tensor[(1, 56, 56, 128), int16], %p1: Tensor[(3, 3, 128, 1), int1 "constants_size_bytes": 0, "device": 1, "io_size_bytes": 1207040, - "workspace_size_bytes": 2466816, + "workspace_size_bytes": 16, } ] assert metadata["memory"]["functions"]["operator_functions"][0]["workspace"] == [ - {"device": 1, "workspace_size_bytes": 2466816} + {"device": 1, "workspace_size_bytes": 16} ] assert ( "fused_nn_conv2d_add_fixed_point_multiply_clip_cast" From 11c688f3c000c1043ecddb403f55ca2ef89cee42 Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Tue, 28 Sep 2021 15:44:55 +0300 Subject: [PATCH 17/22] Revert "Merge branch 'update-arm-simd-intrinsic' of https://github.com/sergey-grovety/tvm into issue8717-x86-DwsConv2d-schedule" This reverts commit e927567058403bcc9e4fdc3d24828b3dcd6a661b, reversing changes made to 0ccb5a01495d02f521eea2af9efa6a3153c4f72b. --- .../zephyr/base-box/base_box_test.sh | 2 - python/tvm/relay/op/strategy/arm_cpu.py | 3 +- .../arm_cpu/cortex_m7/conv2d/direct_simd.py | 13 +- .../arm_cpu/cortex_m7/micro_kernel/gemm.py | 328 ++---------------- tests/micro/zephyr/test_utils.py | 114 ------ tests/micro/zephyr/test_zephyr_aot.py | 113 +++++- tests/micro/zephyr/test_zephyr_armv7m.py | 267 -------------- 7 files changed, 133 insertions(+), 707 deletions(-) delete mode 100644 tests/micro/zephyr/test_utils.py delete mode 100644 tests/micro/zephyr/test_zephyr_armv7m.py diff --git a/apps/microtvm/reference-vm/zephyr/base-box/base_box_test.sh b/apps/microtvm/reference-vm/zephyr/base-box/base_box_test.sh index 77980fdb49aa..2a023b520b01 100755 --- a/apps/microtvm/reference-vm/zephyr/base-box/base_box_test.sh +++ b/apps/microtvm/reference-vm/zephyr/base-box/base_box_test.sh @@ -37,5 +37,3 @@ if [ $board == "stm32f746xx" ]; then else pytest tests/micro/zephyr/test_zephyr_aot.py --zephyr-board=${board} fi - -pytest tests/micro/zephyr/test_zephyr_armv7m.py --zephyr-board=${board} diff --git a/python/tvm/relay/op/strategy/arm_cpu.py b/python/tvm/relay/op/strategy/arm_cpu.py index 9097bc7ce30c..80cce9a4b57e 100644 --- a/python/tvm/relay/op/strategy/arm_cpu.py +++ b/python/tvm/relay/op/strategy/arm_cpu.py @@ -127,7 +127,8 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target): name="conv2d_hwcn.generic", ) elif layout == "NHWC": - if "SMLAD" in isa and kernel_layout == "HWOI": + channels = data.shape[3] + if "SMLAD" in isa and (channels % 4) == 0 and kernel_layout == "HWOI": strategy.add_implementation( wrap_compute_conv2d(topi.arm_cpu.conv2d_direct_simd), wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_direct_simd), diff --git a/python/tvm/topi/arm_cpu/cortex_m7/conv2d/direct_simd.py b/python/tvm/topi/arm_cpu/cortex_m7/conv2d/direct_simd.py index 307312076a7e..988c3a99c059 100644 --- a/python/tvm/topi/arm_cpu/cortex_m7/conv2d/direct_simd.py +++ b/python/tvm/topi/arm_cpu/cortex_m7/conv2d/direct_simd.py @@ -112,14 +112,10 @@ def conv2d_direct_simd_compute(cfg, data, kernel, strides, padding, dilation, ou cfg.reduce_axis(in_channels.value), ) + assert in_channels.value % 4 == 0 owo, owi = cfg.define_split("tile_ow", ow, policy="factors", num_outputs=2) cio, cii = cfg.define_split( - "tile_ci", - ci, - policy="factors", - num_outputs=2, - # TODO: check case with in_channels.value % 4 != 0 with AutoTVM - filter=None if cfg.is_fallback else lambda x: x.size[-1] % 4 == 0, + "tile_ci", ci, policy="factors", num_outputs=2, filter=lambda x: x.size[-1] % 4 == 0 ) coo, coi = cfg.define_split("tile_co", co, policy="factors", num_outputs=2) @@ -138,11 +134,6 @@ def conv2d_direct_simd_compute(cfg, data, kernel, strides, padding, dilation, ou cfg.define_knob("auto_unroll_max_step", [0, 2, 4, 8, 16, 32]) cfg.define_knob("unroll_explicit", [0, 1]) - if cfg.is_fallback: - cfg.fallback_split("tile_ow", [-1, out_width.value]) - cfg.fallback_split("tile_ci", [-1, in_channels.value]) - cfg.fallback_split("tile_co", [-1, out_channels.value]) - return conv diff --git a/python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py b/python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py index 9a00fe272087..fb6f7a589525 100644 --- a/python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py +++ b/python/tvm/topi/arm_cpu/cortex_m7/micro_kernel/gemm.py @@ -46,8 +46,9 @@ def intrin_gemm_MxKxN(M, K, N, in_dtype, out_dtype): K = K.value if isinstance(N, tvm.tir.IntImm): N = N.value + assert K % 4 == 0 # TODO(weberlo, areusch): support more dtypes? - assert in_dtype in ("int8", "int16") + assert in_dtype == "int8" assert out_dtype == "int32" A = te.placeholder((M, K), name="a", dtype=in_dtype) B = te.placeholder((N, K), name="b", dtype=in_dtype) @@ -70,14 +71,13 @@ def intrin_gemm_MxKxN(M, K, N, in_dtype, out_dtype): def intrin_func(ins, outs): aa, bb = ins cc = outs[0] - gemm_func_prefix = "gemm" if in_dtype == "int8" else "gemm16" def _reduce_update(): ib = tvm.tir.ir_builder.create() ib.emit( tvm.tir.call_extern( "int32", - f"{gemm_func_prefix}_{M}x{K}x{N}_update_{uniq_id}", + f"gemm_{M}x{K}x{N}_update_{uniq_id}", aa.access_ptr("r"), bb.access_ptr("r"), cc.access_ptr("w"), @@ -102,7 +102,7 @@ def _body(): ib.emit( tvm.tir.call_extern( "int32", - f"{gemm_func_prefix}_{M}x{K}x{N}_body_{uniq_id}", + f"gemm_{M}x{K}x{N}_body_{uniq_id}", aa.access_ptr("r"), bb.access_ptr("r"), cc.access_ptr("w"), @@ -122,7 +122,7 @@ def _body(): def gemm_MxKxN_impl(M, K, N, uniq_id): """Emit C code for gemm impl.""" # TODO(weberlo, areusch): are there any SIMD tricks to zero out arrays quickly? - # aa_pad_size = M * K + aa_pad_size = M * K bb_pad_size = N * K # code reference: CMSIS-NN paper (https://arxiv.org/abs/1801.06601) cc_code = f""" @@ -132,270 +132,32 @@ def gemm_MxKxN_impl(M, K, N, uniq_id): #include #include -#ifdef __cplusplus -extern "C" -#endif -__STATIC_FORCEINLINE int32_t gemm_{M}x{N}_body_rest_{uniq_id}( - int K, - int8_t *aa, int8_t *bb, int32_t *cc, - int A_stride, int B_stride, int C_stride) {{ - int k_base = (K / 4) * 4; - switch ( K % 4 ) {{ - case 1: - for (int i = 0; i < {M}; i++) {{ - for (int j = 0; j < {N}; j++) {{ - int8_t *a_ptr = &aa[i * A_stride + k_base]; - int8_t *b_ptr = &bb[j * B_stride + k_base]; - cc[i * C_stride + j] = (int32_t) a_ptr[0] * (int32_t) b_ptr[0]; - }} - }} - break; - case 2: - for (int i = 0; i < {M}; i++) {{ - for (int j = 0; j < {N}; j++) {{ - int8_t *a_ptr = &aa[i * A_stride + k_base]; - int8_t *b_ptr = &bb[j * B_stride + k_base]; - cc[i * C_stride + j] = (int32_t) a_ptr[0] * (int32_t) b_ptr[0] - + (int32_t) a_ptr[1] * (int32_t) b_ptr[1]; - }} - }} - break; - case 3: - for (int i = 0; i < {M}; i++) {{ - for (int j = 0; j < {N}; j++) {{ - int8_t *a_ptr = &aa[i * A_stride + k_base]; - int8_t *b_ptr = &bb[j * B_stride + k_base]; - cc[i * C_stride + j] = (int32_t) a_ptr[0] * (int32_t) b_ptr[0] - + (int32_t) a_ptr[1] * (int32_t) b_ptr[1] - + (int32_t) a_ptr[2] * (int32_t) b_ptr[2]; - }} - }} - break; - }} - return 0; -}} - -#ifdef __cplusplus -extern "C" -#endif -__STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_body_loop_{uniq_id}( - int8_t *aa, int8_t *bb, int32_t *cc, - int A_stride, int B_stride, int C_stride) {{ - for (int i = 0; i < {M}; i++) {{ - for (int j = 0; j < {N}; j++) {{ - int32_t sum = 0; - for (int l = 0; l < {K}; l++) {{ - sum += (int32_t) aa[i*A_stride + l] * (int32_t) bb[j*B_stride + l]; - }} - // NOTE: this is the line where `*_body` differs from `*_update`. here - // we're *setting* the result, instead of accumulating, because we know - // the `i` and `j` itervars span their entire respective axes. - cc[i*C_stride + j] = sum; - }} - }} - return 0; -}} - -#ifdef __cplusplus -extern "C" -#endif __STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_body_{uniq_id}( int8_t *aa, int8_t *bb, int32_t *cc, int A_stride, int B_stride, int C_stride) {{ + int16_t aa_pad[{aa_pad_size}]; int16_t bb_pad[{bb_pad_size}]; - if ( {M} < 16 || {N} < 16 ) - return gemm_{M}x{K}x{N}_body_loop_{uniq_id}(aa, bb, cc, A_stride, B_stride, C_stride); - - for (int i = 0; i < {N}; i++) - for (int j = 0; j < {K} / 4; j++) - read_and_pad(&bb[i*B_stride + j*4], (int32_t*) &bb_pad[i*{K} + j*4], (int32_t*) &bb_pad[i*{K} + j*4 + 2]); - for (int i = 0; i < {M}; i++) {{ - int16_t aa_pad_line[{K}]; - for (int l = 0; l < {K} / 4; l++) - read_and_pad(&aa[i*A_stride + l*4], (int32_t*) &aa_pad_line[l*4], (int32_t*) &aa_pad_line[l*4 + 2]); - - for (int j = 0; j < {N}; j++) {{ - int32_t *aa_ptr = (int32_t *) aa_pad_line; - int32_t *bb_ptr = (int32_t *) &bb_pad[j*{K}]; - int32_t sum = 0; - for (int l = 0; l < 2 * ({K} / 4); l++) {{ - sum = __SMLAD(*aa_ptr, *bb_ptr, sum); - ++ aa_ptr; ++ bb_ptr; - }} - // NOTE: this is the line where `*_body` differs from `*_update`. here - // we're *setting* the result, instead of accumulating, because we know - // the `i` and `j` itervars span their entire respective axes. - cc[i*C_stride + j] = sum; + for (int j = 0; j < {K} / 4; j++) {{ + read_and_pad(&aa[i*A_stride + j*4], (int32_t*) &aa_pad[i*{K} + j*4], (int32_t*) &aa_pad[i*{K} + j*4 + 2]); }} }} - if ( {K} % 4 != 0 ) - gemm_{M}x{N}_body_rest_{uniq_id}({K}, aa, bb, cc, A_stride, B_stride, C_stride); - - return 0; -}} - - -#ifdef __cplusplus -extern "C" -#endif -__STATIC_FORCEINLINE int32_t gemm_{M}x{N}_update_rest_{uniq_id}( - int K, - int8_t *aa, int8_t *bb, int32_t *cc, - int A_stride, int B_stride, int C_stride) {{ - int k_base = (K / 4) * 4; - switch ( K % 4 ) {{ - case 1: - for (int i = 0; i < {M}; i++) {{ - for (int j = 0; j < {N}; j++) {{ - int8_t *a_ptr = &aa[i * A_stride + k_base]; - int8_t *b_ptr = &bb[j * B_stride + k_base]; - cc[i * C_stride + j] += (int32_t) a_ptr[0] * (int32_t) b_ptr[0]; - }} - }} - break; - case 2: - for (int i = 0; i < {M}; i++) {{ - for (int j = 0; j < {N}; j++) {{ - int8_t *a_ptr = &aa[i * A_stride + k_base]; - int8_t *b_ptr = &bb[j * B_stride + k_base]; - cc[i * C_stride + j] += (int32_t) a_ptr[0] * (int32_t) b_ptr[0] - + (int32_t) a_ptr[1] * (int32_t) b_ptr[1]; - }} - }} - break; - case 3: - for (int i = 0; i < {M}; i++) {{ - for (int j = 0; j < {N}; j++) {{ - int8_t *a_ptr = &aa[i * A_stride + k_base]; - int8_t *b_ptr = &bb[j * B_stride + k_base]; - cc[i * C_stride + j] += (int32_t) a_ptr[0] * (int32_t) b_ptr[0] - + (int32_t) a_ptr[1] * (int32_t) b_ptr[1] - + (int32_t) a_ptr[2] * (int32_t) b_ptr[2]; - }} - }} - break; - }} - return 0; -}} - -#ifdef __cplusplus -extern "C" -#endif -__STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_update_loop_{uniq_id}( - int8_t *aa, int8_t *bb, int32_t *cc, - int A_stride, int B_stride, int C_stride) {{ - for (int i = 0; i < {M}; i++) {{ - for (int j = 0; j < {N}; j++) {{ - int32_t sum = 0; - for (int l = 0; l < {K}; l++) {{ - sum += (int32_t) aa[i*A_stride + l] * (int32_t) bb[j*B_stride + l]; - }} - cc[i*C_stride + j] += sum; - }} - }} - return 0; -}} - -#ifdef __cplusplus -extern "C" -#endif -__STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_update_{uniq_id}( - int8_t *aa, int8_t *bb, int32_t *cc, - int A_stride, int B_stride, int C_stride) {{ - int16_t bb_pad[{bb_pad_size}]; - - if ( {M} < 16 || {N} < 16 ) - return gemm_{M}x{K}x{N}_update_loop_{uniq_id}(aa, bb, cc, A_stride, B_stride, C_stride); - - for (int i = 0; i < {N}; i++) - for (int j = 0; j < {K} / 4; j++) + for (int i = 0; i < {N}; i++) {{ + for (int j = 0; j < {K} / 4; j++) {{ read_and_pad(&bb[i*B_stride + j*4], (int32_t*) &bb_pad[i*{K} + j*4], (int32_t*) &bb_pad[i*{K} + j*4 + 2]); - - for (int i = 0; i < {M}; i++) {{ - int16_t aa_pad_line[{K}]; - for (int l = 0; l < {K} / 4; l++) - read_and_pad(&aa[i*A_stride + l*4], (int32_t*) &aa_pad_line[l*4], (int32_t*) &aa_pad_line[l*4 + 2]); - - for (int j = 0; j < {N}; j++) {{ - int32_t *aa_ptr = (int32_t *) aa_pad_line; - int32_t *bb_ptr = (int32_t *) &bb_pad[j*{K}]; - int32_t sum = 0; - for (int l = 0; l < 2 * ({K} / 4); l++) {{ - sum = __SMLAD(*aa_ptr, *bb_ptr, sum); - ++ aa_ptr; ++ bb_ptr; - }} - cc[i*C_stride + j] += sum; - }} - }} - - if ( {K} % 4 != 0 ) - gemm_{M}x{N}_update_rest_{uniq_id}({K}, aa, bb, cc, A_stride, B_stride, C_stride); - - return 0; -}} - - - -#ifdef __cplusplus -extern "C" -#endif -__STATIC_FORCEINLINE int32_t gemm16_{M}x{N}_body_rest_{uniq_id}( - int K, - int16_t *aa, int16_t *bb, int32_t *cc, - int A_stride, int B_stride, int C_stride) {{ - int k_base = (K / 2) * 2; - for (int i = 0; i < {M}; i++) {{ - for (int j = 0; j < {N}; j++) {{ - int16_t *a_ptr = &aa[i * A_stride + k_base]; - int16_t *b_ptr = &bb[j * B_stride + k_base]; - cc[i * C_stride + j] = (int32_t) a_ptr[0] * (int32_t) b_ptr[0]; - }} - }} - return 0; -}} - -#ifdef __cplusplus -extern "C" -#endif -__STATIC_FORCEINLINE int32_t gemm16_{M}x{K}x{N}_body_loop_{uniq_id}( - int16_t *aa, int16_t *bb, int32_t *cc, - int A_stride, int B_stride, int C_stride) {{ - for (int i = 0; i < {M}; i++) {{ - for (int j = 0; j < {N}; j++) {{ - int32_t sum = 0; - for (int l = 0; l < {K}; l++) {{ - sum += (int32_t) aa[i*A_stride + l] * (int32_t) bb[j*B_stride + l]; - }} - // NOTE: this is the line where `*_body` differs from `*_update`. here - // we're *setting* the result, instead of accumulating, because we know - // the `i` and `j` itervars span their entire respective axes. - cc[i*C_stride + j] = sum; }} }} - return 0; -}} - -#ifdef __cplusplus -extern "C" -#endif -__STATIC_FORCEINLINE int32_t gemm16_{M}x{K}x{N}_body_{uniq_id}( - int16_t *aa, int16_t *bb, int32_t *cc, - int A_stride, int B_stride, int C_stride) {{ - if ( {M} < 2 || {N} < 2 ) - return gemm16_{M}x{K}x{N}_body_loop_{uniq_id}(aa, bb, cc, A_stride, B_stride, C_stride); for (int i = 0; i < {M}; i++) {{ for (int j = 0; j < {N}; j++) {{ - int32_t *aa_ptr = (int32_t *) &aa[i*A_stride]; - int32_t *bb_ptr = (int32_t *) &bb[j*B_stride]; - int32_t sum = 0; for (int l = 0; l < {K} / 2; l++) {{ - sum = __SMLAD(*aa_ptr, *bb_ptr, sum); - ++ aa_ptr; ++ bb_ptr; + sum = __SMLAD( + *((int32_t*) &aa_pad[i*{K} + l*2]), + *((int32_t*) &bb_pad[j*{K} + l*2]), + sum); }} // NOTE: this is the line where `*_body` differs from `*_update`. here // we're *setting* the result, instead of accumulating, because we know @@ -404,80 +166,46 @@ def gemm_MxKxN_impl(M, K, N, uniq_id): }} }} - if ( {K} % 2 != 0 ) - gemm16_{M}x{N}_body_rest_{uniq_id}({K}, aa, bb, cc, A_stride, B_stride, C_stride); - return 0; }} - #ifdef __cplusplus extern "C" #endif -__STATIC_FORCEINLINE int32_t gemm16_{M}x{N}_update_rest_{uniq_id}( - int K, - int16_t *aa, int16_t *bb, int32_t *cc, +__STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_update_{uniq_id}( + int8_t *aa, int8_t *bb, int32_t *cc, int A_stride, int B_stride, int C_stride) {{ - int k_base = (K / 2) * 2; + int16_t aa_pad[{aa_pad_size}]; + int16_t bb_pad[{bb_pad_size}]; + for (int i = 0; i < {M}; i++) {{ - for (int j = 0; j < {N}; j++) {{ - int16_t *a_ptr = &aa[i * A_stride + k_base]; - int16_t *b_ptr = &bb[j * B_stride + k_base]; - cc[i * C_stride + j] += (int32_t) a_ptr[0] * (int32_t) b_ptr[0]; + for (int j = 0; j < {K} / 4; j++) {{ + read_and_pad(&aa[i*A_stride + j*4], (int32_t*) &aa_pad[i*{K} + j*4], (int32_t*) &aa_pad[i*{K} + j*4 + 2]); }} }} - return 0; -}} -#ifdef __cplusplus -extern "C" -#endif -__STATIC_FORCEINLINE int32_t gemm16_{M}x{K}x{N}_update_loop_{uniq_id}( - int16_t *aa, int16_t *bb, int32_t *cc, - int A_stride, int B_stride, int C_stride) {{ - for (int i = 0; i < {M}; i++) {{ - for (int j = 0; j < {N}; j++) {{ - int32_t sum = 0; - for (int l = 0; l < {K}; l++) {{ - sum += (int32_t) aa[i*A_stride + l] * (int32_t) bb[j*B_stride + l]; - }} - cc[i*C_stride + j] += sum; + for (int i = 0; i < {N}; i++) {{ + for (int j = 0; j < {K} / 4; j++) {{ + read_and_pad(&bb[i*B_stride + j*4], (int32_t*) &bb_pad[i*{K} + j*4], (int32_t*) &bb_pad[i*{K} + j*4 + 2]); }} }} - return 0; -}} - -#ifdef __cplusplus -extern "C" -#endif -__STATIC_FORCEINLINE int32_t gemm16_{M}x{K}x{N}_update_{uniq_id}( - int16_t *aa, int16_t *bb, int32_t *cc, - int A_stride, int B_stride, int C_stride) {{ - if ( {M} < 2 || {N} < 2 ) - return gemm16_{M}x{K}x{N}_update_loop_{uniq_id}(aa, bb, cc, A_stride, B_stride, C_stride); for (int i = 0; i < {M}; i++) {{ for (int j = 0; j < {N}; j++) {{ - int32_t *aa_ptr = (int32_t *) &aa[i*A_stride]; - int32_t *bb_ptr = (int32_t *) &bb[j*B_stride]; - int32_t sum = 0; for (int l = 0; l < {K} / 2; l++) {{ - sum = __SMLAD(*aa_ptr, *bb_ptr, sum); - ++ aa_ptr; ++ bb_ptr; + sum = __SMLAD( + *((int32_t*) &aa_pad[i*{K} + l*2]), + *((int32_t*) &bb_pad[j*{K} + l*2]), + sum); }} cc[i*C_stride + j] += sum; }} }} - if ( {K} % 2 != 0 ) - gemm16_{M}x{N}_update_rest_{uniq_id}({K}, aa, bb, cc, A_stride, B_stride, C_stride); - return 0; }} - - #ifdef __cplusplus extern "C" #endif diff --git a/tests/micro/zephyr/test_utils.py b/tests/micro/zephyr/test_utils.py deleted file mode 100644 index 8e6aba36da80..000000000000 --- a/tests/micro/zephyr/test_utils.py +++ /dev/null @@ -1,114 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import io -import logging -import pathlib -import tarfile - -import numpy as np - -import tvm.micro - - -def build_project(temp_dir, zephyr_board, west_cmd, mod, build_config, extra_files_tar=None): - template_project_dir = ( - pathlib.Path(__file__).parent - / ".." - / ".." - / ".." - / "apps" - / "microtvm" - / "zephyr" - / "template_project" - ).resolve() - project_dir = temp_dir / "project" - project = tvm.micro.generate_project( - str(template_project_dir), - mod, - project_dir, - { - "extra_files_tar": extra_files_tar, - "project_type": "aot_demo", - "west_cmd": west_cmd, - "verbose": bool(build_config.get("debug")), - "zephyr_board": zephyr_board, - }, - ) - project.build() - return project, project_dir - - -def create_header_file(tensor_name, npy_data, output_path, tar_file): - """ - This method generates a header file containing the data contained in the numpy array provided. - It is used to capture the tensor data (for both inputs and expected outputs). - """ - header_file = io.StringIO() - header_file.write("#include \n") - header_file.write("#include \n") - header_file.write("#include \n") - header_file.write(f"const size_t {tensor_name}_len = {npy_data.size};\n") - - if npy_data.dtype == "int8": - header_file.write(f"int8_t {tensor_name}[] =") - elif npy_data.dtype == "int32": - header_file.write(f"int32_t {tensor_name}[] = ") - elif npy_data.dtype == "uint8": - header_file.write(f"uint8_t {tensor_name}[] = ") - elif npy_data.dtype == "float32": - header_file.write(f"float {tensor_name}[] = ") - else: - raise ValueError("Data type not expected.") - - header_file.write("{") - for i in np.ndindex(npy_data.shape): - header_file.write(f"{npy_data[i]}, ") - header_file.write("};\n\n") - - header_file_bytes = bytes(header_file.getvalue(), "utf-8") - raw_path = pathlib.Path(output_path) / f"{tensor_name}.h" - ti = tarfile.TarInfo(name=str(raw_path)) - ti.size = len(header_file_bytes) - ti.mode = 0o644 - ti.type = tarfile.REGTYPE - tar_file.addfile(ti, io.BytesIO(header_file_bytes)) - - -def _read_line(fd, timeout_sec: int): - data = "" - new_line = False - while True: - if new_line: - break - new_data = fd.read(1, timeout_sec=timeout_sec) - logging.debug(f"read data: {new_data}") - for item in new_data: - new_c = chr(item) - data = data + new_c - if new_c == "\n": - new_line = True - break - return data - - -def get_message(fd, expr: str, timeout_sec: int): - while True: - data = _read_line(fd, timeout_sec) - logging.debug(f"new line: {data}") - if expr in data: - return data diff --git a/tests/micro/zephyr/test_zephyr_aot.py b/tests/micro/zephyr/test_zephyr_aot.py index 7cb865d53fbe..1499d1ef27eb 100644 --- a/tests/micro/zephyr/test_zephyr_aot.py +++ b/tests/micro/zephyr/test_zephyr_aot.py @@ -15,9 +15,11 @@ # specific language governing permissions and limitations # under the License. +import io import logging import os import sys +import logging import pathlib import tarfile import tempfile @@ -30,19 +32,106 @@ import tvm.micro from tvm.micro.project_api import server import tvm.testing -from tvm import relay +import tvm.relay as relay +from tvm.contrib import utils from tvm.contrib.download import download_testdata from tvm.micro.interface_api import generate_c_interface_header import conftest -from test_utils import create_header_file -from test_utils import build_project -from test_utils import get_message _LOG = logging.getLogger(__name__) +def _build_project(temp_dir, zephyr_board, west_cmd, mod, build_config, extra_files_tar=None): + template_project_dir = ( + pathlib.Path(__file__).parent + / ".." + / ".." + / ".." + / "apps" + / "microtvm" + / "zephyr" + / "template_project" + ).resolve() + project_dir = temp_dir / "project" + project = tvm.micro.generate_project( + str(template_project_dir), + mod, + project_dir, + { + "extra_files_tar": extra_files_tar, + "project_type": "aot_demo", + "west_cmd": west_cmd, + "verbose": bool(build_config.get("debug")), + "zephyr_board": zephyr_board, + }, + ) + project.build() + return project, project_dir + + +def _create_header_file(tensor_name, npy_data, output_path, tar_file): + """ + This method generates a header file containing the data contained in the numpy array provided. + It is used to capture the tensor data (for both inputs and expected outputs). + """ + header_file = io.StringIO() + header_file.write("#include \n") + header_file.write("#include \n") + header_file.write("#include \n") + header_file.write(f"const size_t {tensor_name}_len = {npy_data.size};\n") + + if npy_data.dtype == "int8": + header_file.write(f"int8_t {tensor_name}[] =") + elif npy_data.dtype == "int32": + header_file.write(f"int32_t {tensor_name}[] = ") + elif npy_data.dtype == "uint8": + header_file.write(f"uint8_t {tensor_name}[] = ") + elif npy_data.dtype == "float32": + header_file.write(f"float {tensor_name}[] = ") + else: + raise ValueError("Data type not expected.") + + header_file.write("{") + for i in np.ndindex(npy_data.shape): + header_file.write(f"{npy_data[i]}, ") + header_file.write("};\n\n") + + header_file_bytes = bytes(header_file.getvalue(), "utf-8") + raw_path = pathlib.Path(output_path) / f"{tensor_name}.h" + ti = tarfile.TarInfo(name=str(raw_path)) + ti.size = len(header_file_bytes) + ti.mode = 0o644 + ti.type = tarfile.REGTYPE + tar_file.addfile(ti, io.BytesIO(header_file_bytes)) + + +def _read_line(fd, timeout_sec: int): + data = "" + new_line = False + while True: + if new_line: + break + new_data = fd.read(1, timeout_sec=timeout_sec) + logging.debug(f"read data: {new_data}") + for item in new_data: + new_c = chr(item) + data = data + new_c + if new_c == "\n": + new_line = True + break + return data + + +def _get_message(fd, expr: str, timeout_sec: int): + while True: + data = _read_line(fd, timeout_sec) + logging.debug(f"new line: {data}") + if expr in data: + return data + + @tvm.testing.requires_micro def test_tflite(temp_dir, board, west_cmd, tvm_debug): """Testing a TFLite model.""" @@ -103,12 +192,12 @@ def test_tflite(temp_dir, board, west_cmd, tvm_debug): ) tf.add(header_path, arcname=os.path.relpath(header_path, tar_temp_dir)) - create_header_file("input_data", sample, "include", tf) - create_header_file( + _create_header_file("input_data", sample, "include", tf) + _create_header_file( "output_data", np.zeros(shape=output_shape, dtype="float32"), "include", tf ) - project, _ = build_project( + project, _ = _build_project( temp_dir, board, west_cmd, @@ -120,9 +209,9 @@ def test_tflite(temp_dir, board, west_cmd, tvm_debug): project.flash() with project.transport() as transport: timeout_read = 60 - get_message(transport, "#wakeup", timeout_sec=timeout_read) + _get_message(transport, "#wakeup", timeout_sec=timeout_read) transport.write(b"start\n", timeout_sec=5) - result_line = get_message(transport, "#result", timeout_sec=timeout_read) + result_line = _get_message(transport, "#result", timeout_sec=timeout_read) result_line = result_line.strip("\n") result_line = result_line.split(":") @@ -164,10 +253,10 @@ def test_qemu_make_fail(temp_dir, board, west_cmd, tvm_debug): lowered.libmod_name, ["input_1"], ["output"], model_files_path ) tf.add(header_path, arcname=os.path.relpath(header_path, tar_temp_dir)) - create_header_file("input_data", np.zeros(shape=shape, dtype=dtype), "include", tf) - create_header_file("output_data", np.zeros(shape=shape, dtype=dtype), "include", tf) + _create_header_file("input_data", np.zeros(shape=shape, dtype=dtype), "include", tf) + _create_header_file("output_data", np.zeros(shape=shape, dtype=dtype), "include", tf) - project, project_dir = build_project( + project, project_dir = _build_project( temp_dir, board, west_cmd, diff --git a/tests/micro/zephyr/test_zephyr_armv7m.py b/tests/micro/zephyr/test_zephyr_armv7m.py deleted file mode 100644 index 7c25719941ed..000000000000 --- a/tests/micro/zephyr/test_zephyr_armv7m.py +++ /dev/null @@ -1,267 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import logging -import os -import pathlib -import sys -import tarfile -import tempfile - -from urllib.request import urlopen, urlretrieve -from urllib.error import HTTPError -import json - -import pytest -import numpy as np - -import requests - -from test_utils import create_header_file -from test_utils import build_project -from test_utils import get_message - -import tvm -import tvm.rpc -import tvm.micro -import tvm.testing -from tvm import relay - -from tvm.contrib.download import download_testdata -from tvm.micro.interface_api import generate_c_interface_header - -import conftest - - -_LOG = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) - - -def _open_tflite_model(): - # Import TFLite model - - model_url = "https://github.com/tlc-pack/web-data/raw/main/testdata/microTVM/model/mnist_model_quant.tflite" - model_path = download_testdata(model_url, "mnist_model_quant.tflite", module="model") - - tflite_model_buf = open(model_path, "rb").read() - - try: - import tflite - - tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0) - except AttributeError: - import tflite.Model - - tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0) - - relay_mod, params = relay.frontend.from_tflite(tflite_model) - - return relay_mod, params - - -def _get_test_data(testdata_dir): - - from PIL import Image - - image_files = ["digit-2.jpg"] - - for file in image_files: - img = Image.open(testdata_dir / file).resize((28, 28)) - img = np.asarray(img).astype("uint8") - sample = np.reshape(img, -1) - - output_shape = (1, 10) - - return sample, output_shape - - -def _apply_desired_layout_simd(relay_mod): - - desired_layouts = {"qnn.conv2d": ["NHWC", "HWOI"], "nn.conv2d": ["NHWC", "HWOI"]} - - seq = tvm.transform.Sequential( - [relay.transform.RemoveUnusedFunctions(), relay.transform.ConvertLayout(desired_layouts)] - ) - - with tvm.transform.PassContext(opt_level=3): - return seq(relay_mod) - - -def _apply_desired_layout_no_simd(relay_mod): - - desired_layouts = {"qnn.conv2d": ["NHWC", "HWIO"], "nn.conv2d": ["NHWC", "HWIO"]} - - seq = tvm.transform.Sequential( - [relay.transform.RemoveUnusedFunctions(), relay.transform.ConvertLayout(desired_layouts)] - ) - - with tvm.transform.PassContext(opt_level=3): - return seq(relay_mod) - - -def _loadCMSIS(temp_dir): - REPO_PATH = "ARM-software/CMSIS_5" - BRANCH = "master" - API_PATH_URL = f"https://api.github.com/repos/{REPO_PATH}/git/trees" - RAW_PATH_URL = f"https://raw.githubusercontent.com/{REPO_PATH}/{BRANCH}" - - url = "https://api.github.com/repos/ARM-software/CMSIS_5/git/trees/master?recursive=1" - r = requests.get(url) - res = r.json() - - include_trees = {} - - for file in res["tree"]: - if file["path"] in {"CMSIS/DSP/Include", "CMSIS/DSP/Include/dsp", "CMSIS/NN/Include"}: - include_trees.update({file["path"]: file["sha"]}) - - for path, sha in include_trees.items(): - url = f"{API_PATH_URL}/{sha}" - content = json.load(urlopen(url)) - temp_path = f"{temp_dir}" - if path == "CMSIS/DSP/Include/dsp": - temp_path = f"{temp_dir}/dsp" - if not os.path.isdir(temp_path): - os.makedirs(temp_path) - for item in content["tree"]: - if item["type"] == "blob": - file_name = item["path"] - file_url = f"{RAW_PATH_URL}/{path}/{file_name}" - print(file_name, " ", file_url) - try: - urlretrieve(file_url, f"{temp_path}/{file_name}") - except HTTPError as e: - print(f"Failed to download {file_url}: {e}") - - -def _generate_project(temp_dir, board, west_cmd, lowered, build_config, sample, output_shape): - - with tempfile.NamedTemporaryFile() as tar_temp_file: - with tarfile.open(tar_temp_file.name, "w:gz") as tf: - with tempfile.TemporaryDirectory() as tar_temp_dir: - model_files_path = os.path.join(tar_temp_dir, "include") - os.mkdir(model_files_path) - _loadCMSIS(model_files_path) - tf.add(model_files_path, arcname=os.path.relpath(model_files_path, tar_temp_dir)) - header_path = generate_c_interface_header( - lowered.libmod_name, ["input_1"], ["output"], model_files_path - ) - tf.add(header_path, arcname=os.path.relpath(header_path, tar_temp_dir)) - - create_header_file("input_data", sample, "include", tf) - create_header_file( - "output_data", np.zeros(shape=output_shape, dtype="float32"), "include", tf - ) - - project, _ = build_project( - temp_dir, - board, - west_cmd, - lowered, - build_config, - extra_files_tar=tar_temp_file.name, - ) - - return project - - -def _run_model(temp_dir, board, west_cmd, lowered, build_config, sample, output_shape): - - project = _generate_project( - temp_dir, board, west_cmd, lowered, build_config, sample, output_shape - ) - - project.flash() - - with project.transport() as transport: - timeout_read = 60 - transport.write(b"start\n", timeout_sec=5) - result_line = get_message(transport, "#result", timeout_sec=timeout_read) - - result_line = result_line.strip("\n") - result_line = result_line.split(":") - result = int(result_line[1]) - time = int(result_line[2]) - logging.info(f"Result: {result}\ttime: {time} ms") - - return result, time - - -@tvm.testing.requires_micro -def test_armv7m_intrinsic(temp_dir, board, west_cmd, tvm_debug): - """Testing a ARM v7m SIMD extension.""" - - if board not in [ - "mps2_an521", - "stm32f746xx_disco", - "nucleo_f746zg", - "nucleo_l4r5zi", - ]: - pytest.skip(msg="Platform does not support ARM v7m SIMD extenion.") - - model = conftest.ZEPHYR_BOARDS[board] - - build_config = {"debug": tvm_debug} - - this_dir = pathlib.Path(os.path.dirname(__file__)) - testdata_dir = this_dir.parent / "testdata" / "mnist" - - relay_mod, params = _open_tflite_model() - - sample, output_shape = _get_test_data(testdata_dir) - - relay_mod_simd = _apply_desired_layout_simd(relay_mod) - # kernel layout "HWIO" is not supported by arm_cpu SIMD extension (see tvm\python\relay\op\strategy\arm_cpu.py) - relay_mod_no_simd = _apply_desired_layout_no_simd(relay_mod) - - target = tvm.target.target.micro( - model, - options=[ - "-keys=arm_cpu,cpu", - "-link-params=1", - "--executor=aot", - "--unpacked-api=1", - "--interface-api=c", - ], - ) - - temp_dir_simd = temp_dir / "simd" - temp_dir_no_simd = temp_dir / "nosimd" - - os.makedirs(temp_dir_simd, exist_ok=True) - os.makedirs(temp_dir_no_simd, exist_ok=True) - - with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): - lowered_simd = relay.build(relay_mod_simd, target, params=params) - lowered_no_simd = relay.build(relay_mod_no_simd, target, params=params) - result_simd, time_simd = _run_model( - temp_dir_simd, board, west_cmd, lowered_simd, build_config, sample, output_shape - ) - result_no_simd, time_no_simd = _run_model( - temp_dir_no_simd, board, west_cmd, lowered_no_simd, build_config, sample, output_shape - ) - - assert result_no_simd == result_simd == 2 - - if board not in [ - "mps2_an521", - ]: - assert time_no_simd > time_simd - - -if __name__ == "__main__": - sys.exit(pytest.main([__file__] + sys.argv[1:])) From e64aea9a560467c1a6c492847bb44bcc1fd66655 Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Tue, 28 Sep 2021 16:00:57 +0300 Subject: [PATCH 18/22] Revert "fix test_export_model_library_format_workspace" This reverts commit 32ede712ada81242f435693403a78d98adf9afeb. fix format move schedule_depthwise_conv2d_nhwc to generic conv2d, add test for schedule_depthwise_conv2d_nhwc fix test_export_model_library_format_workspace use x86 depthwise_conv2d_nhwc schedule for arm_cpu Add x86 schedule for depthwise_conv2d_nhwc # Conflicts: # python/tvm/relay/op/strategy/arm_cpu.py --- python/tvm/relay/op/strategy/arm_cpu.py | 5 ++-- python/tvm/topi/generic/conv2d.py | 30 ++++++++++++++++++- .../topi/python/test_topi_depthwise_conv2d.py | 6 +++- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/python/tvm/relay/op/strategy/arm_cpu.py b/python/tvm/relay/op/strategy/arm_cpu.py index 80cce9a4b57e..2d331d0b57c6 100644 --- a/python/tvm/relay/op/strategy/arm_cpu.py +++ b/python/tvm/relay/op/strategy/arm_cpu.py @@ -21,6 +21,7 @@ from tvm import topi from ....target import arm_isa +from ....topi.generic import conv2d as conv2d_generic from .generic import * from .. import op as _op @@ -207,8 +208,8 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target): else: strategy.add_implementation( wrap_compute_conv2d(topi.nn.depthwise_conv2d_nhwc), - wrap_topi_schedule(topi.x86.schedule_depthwise_conv2d_nhwc), - name="depthwise_conv2d_nhwc.x86", + wrap_topi_schedule(conv2d_generic.schedule_depthwise_conv2d_nhwc), + name="depthwise_conv2d_nhwc.generic", ) else: raise RuntimeError("Unsupported depthwise_conv2d layout {} for arm cpu".format(layout)) diff --git a/python/tvm/topi/generic/conv2d.py b/python/tvm/topi/generic/conv2d.py index 4daa84c29528..2e0acd13a942 100644 --- a/python/tvm/topi/generic/conv2d.py +++ b/python/tvm/topi/generic/conv2d.py @@ -20,7 +20,7 @@ from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity -from ..utils import get_const_tuple +from ..utils import get_const_tuple, traverse_inline def fallback_schedule_cpu_common_int8(cfg, wkl, int32_lanes, num_int8_elements): @@ -361,3 +361,31 @@ def schedule_conv_NCHWc_cpu_1x1_int8( raise ValueError("Unsupported output ndim: %s" % out_ndim) return s + + +def schedule_depthwise_conv2d_nhwc(outs): + """Create schedule for depthwise conv2d in NHWC layout. + Parameters + ---------- + outs : list[te.tensor.Tensor] + The output tensors. + Returns + ------- + s : tvm.te.schedule.Schedule + The computation schedule for depthwise conv2d. + """ + outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs + s = te.create_schedule([x.op for x in outs]) + + def _callback(op): + """Traverse operators from computation graph""" + if "depthwise_conv2d_nhwc" in op.tag: + out = outs[0] + depthwise_conv2d_out = op.output(0) + data_pad = depthwise_conv2d_out.op.input_tensors[0] + s[data_pad].compute_inline() + s[depthwise_conv2d_out].compute_at(s[out], s[out].op.axis[3]) + s[out].fuse(*s[out].op.axis) + + traverse_inline(s, outs[0].op, _callback) + return s diff --git a/tests/python/topi/python/test_topi_depthwise_conv2d.py b/tests/python/topi/python/test_topi_depthwise_conv2d.py index 27601cd32b89..cca8f124b36b 100644 --- a/tests/python/topi/python/test_topi_depthwise_conv2d.py +++ b/tests/python/topi/python/test_topi_depthwise_conv2d.py @@ -30,6 +30,7 @@ from tvm.contrib.pickle_memoize import memoize from tvm.topi.nn.depthwise_conv2d import _get_workload from tvm.topi.x86.depthwise_conv2d import _fallback_schedule +from tvm.topi.generic import conv2d as conv2d_generic _depthwise_conv2d_implement = { @@ -53,7 +54,10 @@ ], }, "NHWC": { - "generic": [(topi.nn.depthwise_conv2d_nhwc, topi.generic.schedule_depthwise_conv2d_nhwc)], + "generic": [ + (topi.nn.depthwise_conv2d_nhwc, topi.generic.schedule_depthwise_conv2d_nhwc), + (topi.nn.depthwise_conv2d_nhwc, conv2d_generic.schedule_depthwise_conv2d_nhwc) + ], "arm_cpu": [ ( topi.arm_cpu.compute_depthwise_conv2d_nhwc, From 3c19f9c1dc8a89bc41b76120f019350f4acb77b8 Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Wed, 29 Sep 2021 10:51:03 +0300 Subject: [PATCH 19/22] move schedule_depthwise_conv2d_nhwc to generic conv2d, add test for schedule_depthwise_conv2d_nhwc fix format Revert "fix test_export_model_library_format_workspace" added a missing comma --- python/tvm/relay/op/strategy/x86.py | 29 ++-------------- python/tvm/topi/x86/depthwise_conv2d.py | 34 ++----------------- .../topi/python/test_topi_depthwise_conv2d.py | 4 +-- .../test_micro_model_library_format.py | 4 +-- 4 files changed, 7 insertions(+), 64 deletions(-) diff --git a/python/tvm/relay/op/strategy/x86.py b/python/tvm/relay/op/strategy/x86.py index 8dacf262b9c4..a6e141f2753b 100644 --- a/python/tvm/relay/op/strategy/x86.py +++ b/python/tvm/relay/op/strategy/x86.py @@ -214,8 +214,8 @@ def conv2d_strategy_cpu(attrs, inputs, out_type, target): ) strategy.add_implementation( wrap_compute_conv2d(topi.nn.depthwise_conv2d_nhwc), - wrap_topi_schedule(topi.x86.schedule_depthwise_conv2d_nhwc), - name="depthwise_conv2d_nhwc.x86", + wrap_topi_schedule(topi.generic.schedule_depthwise_conv2d_nhwc), + name="depthwise_conv2d_nhwc.generic", ) else: raise RuntimeError("Unsupported depthwise_conv2d layout {}".format(layout)) @@ -565,31 +565,6 @@ def sparse_dense_strategy_cpu(attrs, inputs, out_type, target): return strategy -@sparse_conv2d_strategy.register("cpu") -def sparse_conv2d_strategy_cpu(attrs, inputs, out_type, target): - """sparse conv2d x86 strategy""" - strategy = _op.OpStrategy() - if attrs["kernel_size"][0] == 1: - strategy.add_implementation( - wrap_compute_sparse_conv2d(topi.nn.sparse_conv2d), - wrap_topi_schedule(topi.generic.schedule_sparse_conv2d), - name="sparse_conv2d.generic", - ) - elif attrs["kernel_size"][0] == 3: - if attrs["layout"] == "NHWC": - strategy.add_implementation( - wrap_compute_sparse_conv2d(topi.x86.spconv2d_3x3_nhwc), - wrap_topi_schedule(topi.x86.schedule_spconv2d_3x3_nhwc), - name="conv3x3_spNHWC.x86", - ) - elif attrs["layout"] == "NCHW": - strategy.add_implementation( - wrap_compute_sparse_conv2d(topi.x86.spconv2d_3x3_nchw), - wrap_topi_schedule(topi.x86.schedule_spconv2d_3x3_nchw), - ) - return strategy - - @roi_align_strategy.register("cpu") def roi_align_strategy_cpu(attrs, inputs, out_type, target): """roi_align x86 strategy""" diff --git a/python/tvm/topi/x86/depthwise_conv2d.py b/python/tvm/topi/x86/depthwise_conv2d.py index 46f70ffd575b..a0225ef9e147 100644 --- a/python/tvm/topi/x86/depthwise_conv2d.py +++ b/python/tvm/topi/x86/depthwise_conv2d.py @@ -27,7 +27,7 @@ from ..nn.depthwise_conv2d import _get_workload, depthwise_conv2d_infer_layout from ..nn.conv2d import unpack_NCHWc_to_nchw from ..utils import traverse_inline -from .utils import get_simd_32bit_lanes +from .utils import get_fp32_len def _fallback_schedule(cfg, wkl): @@ -40,7 +40,7 @@ def _fallback_schedule(cfg, wkl): wkl : topi.nn.depthwise_conv2d.Workload Convolution workload """ - simd_width = get_simd_32bit_lanes() + simd_width = get_fp32_len() pt, pl, pb, pr = wkl.padt, wkl.padl, wkl.padb, wkl.padr HSTR, WSTR = wkl.stride_h, wkl.stride_w @@ -305,36 +305,6 @@ def _schedule_depthwise_conv2d_NCHWc_impl(s, cfg, data_vec, kernel_vec, conv_out return s -def schedule_depthwise_conv2d_nhwc(outs): - """Create schedule for depthwise conv2d in NHWC layout. - - Parameters - ---------- - outs : list[te.tensor.Tensor] - The output tensors. - - Returns - ------- - s : tvm.te.schedule.Schedule - The computation schedule for depthwise conv2d. - """ - outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs - s = te.create_schedule([x.op for x in outs]) - - def _callback(op): - """Traverse operators from computation graph""" - if "depthwise_conv2d_nhwc" in op.tag: - out = outs[0] - depthwise_conv2d_out = op.output(0) - data_pad = depthwise_conv2d_out.op.input_tensors[0] - s[data_pad].compute_inline() - s[depthwise_conv2d_out].compute_at(s[out], s[out].op.axis[3]) - s[out].fuse(*s[out].op.axis) - - traverse_inline(s, outs[0].op, _callback) - return s - - @depthwise_conv2d_infer_layout.register("cpu") def _depthwise_conv2d_infer_layout(workload, cfg): _, data, kernel, strides, padding, dilation, _, _, dtype = workload diff --git a/tests/python/topi/python/test_topi_depthwise_conv2d.py b/tests/python/topi/python/test_topi_depthwise_conv2d.py index cca8f124b36b..2c7631fcfaa0 100644 --- a/tests/python/topi/python/test_topi_depthwise_conv2d.py +++ b/tests/python/topi/python/test_topi_depthwise_conv2d.py @@ -56,7 +56,7 @@ "NHWC": { "generic": [ (topi.nn.depthwise_conv2d_nhwc, topi.generic.schedule_depthwise_conv2d_nhwc), - (topi.nn.depthwise_conv2d_nhwc, conv2d_generic.schedule_depthwise_conv2d_nhwc) + (topi.nn.depthwise_conv2d_nhwc, conv2d_generic.schedule_depthwise_conv2d_nhwc), ], "arm_cpu": [ ( @@ -65,8 +65,6 @@ ) ], "gpu": [(topi.nn.depthwise_conv2d_nhwc, topi.cuda.schedule_depthwise_conv2d_nhwc)], - "mali": [(topi.mali.depthwise_conv2d_nhwc, topi.mali.schedule_depthwise_conv2d_nhwc)], - "bifrost": [(topi.mali.depthwise_conv2d_nhwc, topi.mali.schedule_depthwise_conv2d_nhwc)], }, "NCHWc": { "generic": [(topi.x86.depthwise_conv2d_NCHWc, topi.x86.schedule_depthwise_conv2d_NCHWc)], diff --git a/tests/python/unittest/test_micro_model_library_format.py b/tests/python/unittest/test_micro_model_library_format.py index 69e8a35bf1b8..92c1174e728c 100644 --- a/tests/python/unittest/test_micro_model_library_format.py +++ b/tests/python/unittest/test_micro_model_library_format.py @@ -318,11 +318,11 @@ def @main(%p0: Tensor[(1, 56, 56, 128), int16], %p1: Tensor[(3, 3, 128, 1), int1 "constants_size_bytes": 0, "device": 1, "io_size_bytes": 1207040, - "workspace_size_bytes": 16, + "workspace_size_bytes": 2466816, } ] assert metadata["memory"]["functions"]["operator_functions"][0]["workspace"] == [ - {"device": 1, "workspace_size_bytes": 16} + {"device": 1, "workspace_size_bytes": 2466816} ] assert ( "fused_nn_conv2d_add_fixed_point_multiply_clip_cast" From 456a3651f595372c35ed47947ac42a2c67bb6dc6 Mon Sep 17 00:00:00 2001 From: Alex-grovety Date: Wed, 29 Sep 2021 12:00:02 +0300 Subject: [PATCH 20/22] Revert wrong merge changes --- python/tvm/relay/op/strategy/x86.py | 25 +++++++++++++++++++ python/tvm/topi/x86/depthwise_conv2d.py | 4 +-- .../topi/python/test_topi_depthwise_conv2d.py | 2 ++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/python/tvm/relay/op/strategy/x86.py b/python/tvm/relay/op/strategy/x86.py index a6e141f2753b..1c8d1b478cb1 100644 --- a/python/tvm/relay/op/strategy/x86.py +++ b/python/tvm/relay/op/strategy/x86.py @@ -565,6 +565,31 @@ def sparse_dense_strategy_cpu(attrs, inputs, out_type, target): return strategy +@sparse_conv2d_strategy.register("cpu") +def sparse_conv2d_strategy_cpu(attrs, inputs, out_type, target): + """sparse conv2d x86 strategy""" + strategy = _op.OpStrategy() + if attrs["kernel_size"][0] == 1: + strategy.add_implementation( + wrap_compute_sparse_conv2d(topi.nn.sparse_conv2d), + wrap_topi_schedule(topi.generic.schedule_sparse_conv2d), + name="sparse_conv2d.generic", + ) + elif attrs["kernel_size"][0] == 3: + if attrs["layout"] == "NHWC": + strategy.add_implementation( + wrap_compute_sparse_conv2d(topi.x86.spconv2d_3x3_nhwc), + wrap_topi_schedule(topi.x86.schedule_spconv2d_3x3_nhwc), + name="conv3x3_spNHWC.x86", + ) + elif attrs["layout"] == "NCHW": + strategy.add_implementation( + wrap_compute_sparse_conv2d(topi.x86.spconv2d_3x3_nchw), + wrap_topi_schedule(topi.x86.schedule_spconv2d_3x3_nchw), + ) + return strategy + + @roi_align_strategy.register("cpu") def roi_align_strategy_cpu(attrs, inputs, out_type, target): """roi_align x86 strategy""" diff --git a/python/tvm/topi/x86/depthwise_conv2d.py b/python/tvm/topi/x86/depthwise_conv2d.py index a0225ef9e147..5e49c2cb3b78 100644 --- a/python/tvm/topi/x86/depthwise_conv2d.py +++ b/python/tvm/topi/x86/depthwise_conv2d.py @@ -27,7 +27,7 @@ from ..nn.depthwise_conv2d import _get_workload, depthwise_conv2d_infer_layout from ..nn.conv2d import unpack_NCHWc_to_nchw from ..utils import traverse_inline -from .utils import get_fp32_len +from .utils import get_simd_32bit_lanes def _fallback_schedule(cfg, wkl): @@ -40,7 +40,7 @@ def _fallback_schedule(cfg, wkl): wkl : topi.nn.depthwise_conv2d.Workload Convolution workload """ - simd_width = get_fp32_len() + simd_width = get_simd_32bit_lanes() pt, pl, pb, pr = wkl.padt, wkl.padl, wkl.padb, wkl.padr HSTR, WSTR = wkl.stride_h, wkl.stride_w diff --git a/tests/python/topi/python/test_topi_depthwise_conv2d.py b/tests/python/topi/python/test_topi_depthwise_conv2d.py index 2c7631fcfaa0..24c232129c91 100644 --- a/tests/python/topi/python/test_topi_depthwise_conv2d.py +++ b/tests/python/topi/python/test_topi_depthwise_conv2d.py @@ -65,6 +65,8 @@ ) ], "gpu": [(topi.nn.depthwise_conv2d_nhwc, topi.cuda.schedule_depthwise_conv2d_nhwc)], + "mali": [(topi.mali.depthwise_conv2d_nhwc, topi.mali.schedule_depthwise_conv2d_nhwc)], + "bifrost": [(topi.mali.depthwise_conv2d_nhwc, topi.mali.schedule_depthwise_conv2d_nhwc)], }, "NCHWc": { "generic": [(topi.x86.depthwise_conv2d_NCHWc, topi.x86.schedule_depthwise_conv2d_NCHWc)], From 918f0c8cd1dcc709a4db9a299174150aba4e92e6 Mon Sep 17 00:00:00 2001 From: Sergey Smirnov Date: Thu, 30 Sep 2021 10:03:19 +0300 Subject: [PATCH 21/22] empty commit to force pipeline restart From bb79260600f671c241a4655f0972155fad56db5d Mon Sep 17 00:00:00 2001 From: Alex-grovety Date: Thu, 30 Sep 2021 17:33:56 +0300 Subject: [PATCH 22/22] Add condition to use compute_at for generic schedule_depthwise_conv2d_nhwc --- python/tvm/topi/generic/conv2d.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/tvm/topi/generic/conv2d.py b/python/tvm/topi/generic/conv2d.py index 2e0acd13a942..3772fdbafe6c 100644 --- a/python/tvm/topi/generic/conv2d.py +++ b/python/tvm/topi/generic/conv2d.py @@ -384,7 +384,8 @@ def _callback(op): depthwise_conv2d_out = op.output(0) data_pad = depthwise_conv2d_out.op.input_tensors[0] s[data_pad].compute_inline() - s[depthwise_conv2d_out].compute_at(s[out], s[out].op.axis[3]) + if depthwise_conv2d_out != out: + s[depthwise_conv2d_out].compute_at(s[out], s[out].op.axis[3]) s[out].fuse(*s[out].op.axis) traverse_inline(s, outs[0].op, _callback)