From 1212e153aaac2bb825c9e26055ff5982debe2228 Mon Sep 17 00:00:00 2001 From: Shriram Shastry Date: Fri, 26 Nov 2021 08:14:42 -0800 Subject: [PATCH] math : Add square root function using lookup table fix point math square function having positive number y as input and return the positive number x multiplied by itself (squared) Signed-off-by: Shriram Shastry --- src/include/sof/math/sqrt.h | 15 ++ src/math/CMakeLists.txt | 4 + src/math/Kconfig | 9 +- src/math/sqrt_int16.c | 147 ++++++++++++++++ .../cmocka/src/math/arithmetic/CMakeLists.txt | 5 + test/cmocka/src/math/arithmetic/square_root.c | 162 ++++++++++++++++++ 6 files changed, 341 insertions(+), 1 deletion(-) create mode 100644 src/include/sof/math/sqrt.h create mode 100644 src/math/sqrt_int16.c create mode 100644 test/cmocka/src/math/arithmetic/square_root.c diff --git a/src/include/sof/math/sqrt.h b/src/include/sof/math/sqrt.h new file mode 100644 index 000000000000..1cdb82afe9a0 --- /dev/null +++ b/src/include/sof/math/sqrt.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * + * Copyright(c) 2021 Intel Corporation. All rights reserved. + * + * Author: Shriram Shastry + * + */ + +#ifndef __SOF_MATH__SQRTLOOKUP__H +#define __SOF_MATH__SQRTLOOKUP__H + +#include + +uint16_t sqrt_int16(uint16_t u); +#endif diff --git a/src/math/CMakeLists.txt b/src/math/CMakeLists.txt index 8ae165639a99..943f6dfb8d9f 100644 --- a/src/math/CMakeLists.txt +++ b/src/math/CMakeLists.txt @@ -10,6 +10,10 @@ if(CONFIG_CORDIC_FIXED) add_local_sources(sof trig.c) endif() +if(CONFIG_SQRT_FIXED) + add_local_sources(sof sqrt_int16.c) +endif() + if(CONFIG_MATH_DECIBELS) add_local_sources(sof decibels.c) endif() diff --git a/src/math/Kconfig b/src/math/Kconfig index 8fcb454affd4..f21c80bd7ae6 100644 --- a/src/math/Kconfig +++ b/src/math/Kconfig @@ -20,7 +20,6 @@ config POWER_FIXED with base having values from -32 to + 32 . Exponent values range from -3 to +3. Power out MIN/MAX range is -/+32768. - config BINARY_LOGARITHM_FIXED bool "Binary Logarithm function" default n @@ -31,6 +30,14 @@ config BINARY_LOGARITHM_FIXED with a short lookup table. (log2n) operates for a range of 32 bit width size i.e. 1 to 4294967295. +config SQRT_FIXED + bool "Square Root functions" + default n + help + Select this to enable sqrt_int() functions as 16 bit version + to calculate square root.square function having positive number + y as input and return the positive number x multiplied by itself (squared) + config NUMBERS_GCD bool "Greatest common divisor" default n diff --git a/src/math/sqrt_int16.c b/src/math/sqrt_int16.c new file mode 100644 index 000000000000..0d62e8145187 --- /dev/null +++ b/src/math/sqrt_int16.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: BSD-3-Clause +// +// Copyright(c) 2021 Intel Corporation. All rights reserved. +// +// Author: Shriram Shastry +// +// + +#include + +#define SQRT_WRAP_SCHAR_BITS 0xFF + +/* + * Square root + * + * Y = SQRTLOOKUP_INT16(U) computes the square root of + * U using lookup tables. + * Range of u is [0 to 65535] + * Range of y is [0 to 4] + * +------------------+-----------------+--------+--------+ + * | u | y (returntype) | u | y | + * +----+-----+-------+----+----+-------+--------+--------+ + * |WLen| FLen|Signbit|WLen|FLen|Signbit| Qformat| Qformat| + * +----+-----+-------+----+----+-------+--------+--------+ + * | 16 | 12 | 0 | 16 | 12 | 1 | 4.12 | 4.12 | + * +------------------+-----------------+--------+--------+ + + * Arguments : uint16_t u + * Return Type : int32_t + */ +uint16_t sqrt_int16(uint16_t u) +{ + static const int32_t iv1[193] = { + 46341, 46702, 47059, 47415, 47767, 48117, 48465, 48809, 49152, 49492, 49830, 50166, + 50499, 50830, 51159, 51486, 51811, 52134, 52454, 52773, 53090, 53405, 53719, 54030, + 54340, 54647, 54954, 55258, 55561, 55862, 56162, 56459, 56756, 57051, 57344, 57636, + 57926, 58215, 58503, 58789, 59073, 59357, 59639, 59919, 60199, 60477, 60753, 61029, + 61303, 61576, 61848, 62119, 62388, 62657, 62924, 63190, 63455, 63719, 63982, 64243, + 64504, 64763, 65022, 65279, 65536, 65792, 66046, 66300, 66552, 66804, 67054, 67304, + 67553, 67801, 68048, 68294, 68539, 68784, 69027, 69270, 69511, 69752, 69992, 70232, + 70470, 70708, 70945, 71181, 71416, 71651, 71885, 72118, 72350, 72581, 72812, 73042, + 73271, 73500, 73728, 73955, 74182, 74408, 74633, 74857, 75081, 75304, 75527, 75748, + 75969, 76190, 76410, 76629, 76848, 77066, 77283, 77500, 77716, 77932, 78147, 78361, + 78575, 78788, 79001, 79213, 79424, 79635, 79846, 80056, 80265, 80474, 80682, 80890, + 81097, 81303, 81509, 81715, 81920, 82125, 82329, 82532, 82735, 82938, 83140, 83341, + 83542, 83743, 83943, 84143, 84342, 84540, 84739, 84936, 85134, 85331, 85527, 85723, + 85918, 86113, 86308, 86502, 86696, 86889, 87082, 87275, 87467, 87658, 87849, 88040, + 88231, 88420, 88610, 88799, 88988, 89176, 89364, 89552, 89739, 89926, 90112, 90298, + 90484, 90669, 90854, 91038, 91222, 91406, 91589, 91772, 91955, 92137, 92319, 92501, + 92682}; + static const int8_t iv[256] = { + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + int32_t xfi_tmp; + int32_t y; + uint16_t v; + int32_t a_i; + int32_t l1_i; + int32_t l2_i; + int num_left_shifts; + int shift_factor; + int xfi; + unsigned int slice_temp; + int sign = 1; + + if (!u) + return 0; + /* Normalize the input such that u = x * 2^n and 0.5 <= x < 2 + * Normalize to the range [1, 2) + * normalizes the input U + * such that the output X is + * U = X*2^N + * 1 <= X < 2 + * The output X is unsigned with one integer bit. + * The input U must be scalar and positive. + * The number of bits in a byte is assumed to be B=8. + * Reinterpret the input as an unsigned integer. + * Unroll the loop in generated code so there will be no branching. + * For each iteration, see how many leading zeros are in the high + * byte of V, and shift them out to the left. Continue with the + * shifted V for as many bytes as it has. + * The index is the high byte of the input plus 1 to make it a + * one-based index. + * Index into the number-of-leading-zeros lookup table. This lookup + * table takes in a byte and returns the number of leading zeros in the + * binary representation. + */ + + shift_factor = iv[u >> 8]; + /* Left-shift out all the leading zeros in the high byte. */ + v = u << shift_factor; + /* Update the total number of left-shifts */ + num_left_shifts = shift_factor; + /* For each iteration, see how many leading zeros are in the high + * byte of V, and shift them out to the left. Continue with the + * shifted V for as many bytes as it has. + * The index is the high byte of the input plus 1 to make it a + * one-based index. + * Index into the number-of-leading-zeros lookup table. This lookup + * table takes in a byte and returns the number of leading zeros in the + * binary representation. + */ + shift_factor = iv[v >> 8]; + /* Left-shift out all the leading zeros in the high byte. + * Update the total number of left-shifts + */ + num_left_shifts += shift_factor; + /* The input has been left-shifted so the most-significant-bit is a 1. + * Reinterpret the output as unsigned with one integer bit, so + * that 1 <= x < 2. + * Let Q = int(u). Then u = Q*2^(-u_fraction_length), + * and x = Q*2^num_left_shifts * 2^(1-word_length). Therefore, + * u = x*2^n, where n is defined as: + */ + xfi_tmp = (3 - num_left_shifts) & 1; + v = (v << shift_factor) >> xfi_tmp; + /* Extract the high byte of x */ + /* Convert the high byte into an index for SQRTLUT */ + slice_temp = SQRT_WRAP_SCHAR_BITS & (v >> 8); + /* The upper byte was used for the index into SQRTLUT. + * The remainder, r, interpreted as a fraction, is used to + * linearly interpolate between points. + */ + a_i = iv1[((v >> 8) - 63) - 1]; + a_i <<= 8; + l1_i = iv1[SQRT_WRAP_SCHAR_BITS & ((slice_temp + 194) - 1)]; + l2_i = iv1[(slice_temp - 63) - 1]; + y = a_i + (v & SQRT_WRAP_SCHAR_BITS) * (l1_i - l2_i); + xfi = (((xfi_tmp - num_left_shifts) + 3) >> 1); + shift_factor = (((xfi_tmp - num_left_shifts) + 3) >> 1); + if (xfi != 0) { + if (xfi > 0) + y <<= (shift_factor >= 32) ? 0 : shift_factor; + else + y >>= -sign * xfi; + } + y = ((y >> 11) + 1) >> 1; + + return y; +} diff --git a/test/cmocka/src/math/arithmetic/CMakeLists.txt b/test/cmocka/src/math/arithmetic/CMakeLists.txt index bf22cefb1e22..5657f0b87487 100644 --- a/test/cmocka/src/math/arithmetic/CMakeLists.txt +++ b/test/cmocka/src/math/arithmetic/CMakeLists.txt @@ -9,3 +9,8 @@ cmocka_test(base2_logarithm base2_logarithm.c ${PROJECT_SOURCE_DIR}/src/math/base2log.c ) + +cmocka_test(square_root + square_root.c + ${PROJECT_SOURCE_DIR}/src/math/sqrt_int16.c +) diff --git a/test/cmocka/src/math/arithmetic/square_root.c b/test/cmocka/src/math/arithmetic/square_root.c new file mode 100644 index 000000000000..fd0abebdc6e3 --- /dev/null +++ b/test/cmocka/src/math/arithmetic/square_root.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: BSD-3-Clause +// +// Copyright(c) 2021 Intel Corporation. All rights reserved. +// +// Author: Shriram Shastry + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* 'Error[max] = 0.0003000860000000,THD(-dBc) = -87.1210823527511309' */ +#define CMP_TOLERANCE 0.0001689942 + +static const double sqrt_ref_table[] = { + 0.0000000000000000, 0.2529127196287289, 0.3580137261684250, 0.4383362543470480, + 0.5060667106469264, 0.5657458434447398, 0.6199010757774179, 0.6695089151758922, + 0.7156864056624241, 0.7590598625931949, 0.8002380017063674, 0.8392530626247365, + 0.8765332548597343, 0.9124250825410271, 0.9468285879714448, 0.9800251112854200, + 1.0121334212938529, 1.0433710015497843, 1.0735864616438677, 1.1029744939820685, + 1.1317074351394887, 1.1596234572049671, 1.1868830634270588, 1.2135304899342250, + 1.2397036881347898, 1.2652391387105444, 1.2902693214499832, 1.3148230929007141, + 1.3390178303517843, 1.3626935069009465, 1.3859648038460428, 1.4089384024417106, + 1.4314580907679415, 1.4536289448738284, 1.4754666899408471, 1.4970674458754356, + 1.5182805344369004, 1.5392012945030940, 1.5598414883410430, 1.5802893574042065, + 1.6003997303408295, 1.6202605162827983, 1.6399552204252408, 1.6593426315110451, + 1.6785061252494731, 1.6974532854396907, 1.7162624043615824, 1.7347972458979175, + 1.7531361407845656, 1.7712851751976586, 1.7893183496097054, 1.8071040368501201, + 1.8247163735084968, 1.8422265952170487, 1.8595062978852748, 1.8766268983537990, + 1.8935927121149891, 1.9104717594746068, 1.9271396388170734, 1.9436645881555799, + 1.9601125007572908, 1.9763617734046062, 1.9924785326635266, 2.0084659685628234, + 2.0243874459327196, 2.0401248429936829, 2.0557417684986605, 2.0712409474756917, + 2.0866835042418388, 2.1019545405705138, 2.1171154277400652, 2.1322257663648099, + 2.1471729232877355, 2.1620167451363552, 2.1767593459084997, 2.1914584719713490, + 2.2060043241401410, 2.2204548907543695, 2.2348120201987909, 2.2491317769308226, + 2.2633070038662453, 2.2773940013752560, 2.2914476694602910, 2.3053627188850347, + 2.3191942802134968, 2.3329438383992445, 2.3466648541067809, 2.3602543890966499, + 2.3737661268541177, 2.3872013883939496, 2.4006123079591588, 2.4138981537908761, + 2.4271112748749282, 2.4403028756693299, 2.4533737931163282, 2.4663754402969551, + 2.4793089069839604, 2.4922242356226696, 2.5050242482608827, 2.5177591878742098, + 2.5304782774210888, 2.5430857547967194, 2.5556310375326090, 2.5681150370943278, + 2.5805859466650203, 2.5929498012639969, 2.6052549809231724, 2.6175023131556161, + 2.6297390257875399, 2.6418728560436060, 2.6539512111660981, 2.6660206330081166, + 2.6779900782816579, 2.6899062628881700, 2.7017698915479462, 2.7136266381449752, + 2.7253870138018930, 2.7370968595849874, 2.7487568212739375, 2.7604117531402812, + 2.7719736453698474, 2.7834875128828940, 2.7949976241045360, 2.8064170328908711, + 2.8177901636300033, 2.8291175744390689, 2.8404427884354582, 2.8516802201728368, + 2.8628735427669523, 2.8740232715872360, 2.8851722218959477, 2.8962361080806240, + 2.9072578897476569, 2.9182798738083706, 2.9292187124939990, 2.9401168530136688, + 2.9509747462702896, 2.9618340496219568, 2.9726126187665289, 2.9833522462156559, + 2.9940941216626773, 3.0047569707257522, 3.0153821145710538, 3.0259699503836783, + 3.0365610688738007, 3.0470753139280951, 3.0575534030495688, 3.0679957066870220, + 3.0784422425351754, 3.0888139284157279, 3.0991509043809078, 3.1094927741514371, + 3.1197612338526808, 3.1299960063872287, 3.1401974211424988, 3.1504045499149789, + 3.1605400917999757, 3.1706432337342845, 3.1807142844611178, 3.1907918051402224, + 3.2007994606816590, 3.2107759235502562, 3.2207593849316032, 3.2306742112715421, + 3.2405587023112234, 3.2504131347991749, 3.2602752232365293, 3.2700702400713046, + 3.2798360048560355, 3.2895727781126838, 3.2993178153786578, 3.3089972636170311, + 3.3186484800856810, 3.3283083869662677, 3.3379037677111065, 3.3474716438306089, + 3.3570122504989461, 3.3665620793882591, 3.3760487375221642, 3.3855088128485207, + 3.3949784839156196, 3.4043859578490805, 3.4137675072784321, 3.4231233453528955, + 3.4324892457042018, 3.4417941928048226, 3.4510740515635128, 3.4603290238249023, + 3.4695944917958350, 3.4788001927748020, 3.4879815975718680, 3.4971738031409019, + 3.5063070962374359, 3.5154166604934614, 3.5245026799003858, 3.5335998818485379, + 3.5426392659640071, 3.5516556438511886, 3.5606491902811768, 3.5696542746637245, + 3.5786025882144274, 3.5875285822032135, 3.5964663647113397, 3.6053481324623839, + 3.6142080737002402, 3.6230463485511746, 3.6318967259718442, 3.6406920594682268, + 3.6494661959833250, 3.6582526566654741, 3.6669847755001657, 3.6756961500510350, + 3.6843869274616097, 3.6930903069956198, 3.7017402474207994, 3.7103700224000571, + 3.7189797723132347, 3.7276023837381045, 3.7361724230822109, 3.7447228493908598, + 3.7532863204297375, 3.7617978476886553, 3.7702901600042669, 3.7787633869263368, + 3.7872498886065071, 3.7956852559847478, 3.8041019184887777, 3.8125000000000000, + 3.8209115711273665, 3.8292727871131094, 3.8376157861196840, 3.8459724266107265, + 3.8542792776341468, 3.8625682639598748, 3.8708395003538962, 3.8791245690071618, + 3.8873605782876637, 3.8955791750874478, 3.9037804693815712, 3.9119957742180653, + 3.9201627238228260, 3.9283126944020128, 3.9364768015796816, 3.9445930655930783, + 3.9526926641056979, 3.9607756993580185, 3.9688730295891301, 3.9769231786332004, + 3.9849570653270532, 3.9930053589840071, 3.9999694823054588, 3.9999694823054588}; +/* testvector in Q4.12 */ +static const uint32_t uv[252] = { + 0U, 262U, 525U, 787U, 1049U, 1311U, + 1574U, 1836U, 2098U, 2360U, 2623U, 2885U, + 3147U, 3410U, 3672U, 3934U, 4196U, 4459U, + 4721U, 4983U, 5246U, 5508U, 5770U, 6032U, + 6295U, 6557U, 6819U, 7081U, 7344U, 7606U, + 7868U, 8131U, 8393U, 8655U, 8917U, 9180U, + 9442U, 9704U, 9966U, 10229U, 10491U, 10753U, + 11016U, 11278U, 11540U, 11802U, 12065U, 12327U, + 12589U, 12851U, 13114U, 13376U, 13638U, 13901U, + 14163U, 14425U, 14687U, 14950U, 15212U, 15474U, + 15737U, 15999U, 16261U, 16523U, 16786U, 17048U, + 17310U, 17572U, 17835U, 18097U, 18359U, 18622U, + 18884U, 19146U, 19408U, 19671U, 19933U, 20195U, + 20457U, 20720U, 20982U, 21244U, 21507U, 21769U, + 22031U, 22293U, 22556U, 22818U, 23080U, 23342U, + 23605U, 23867U, 24129U, 24392U, 24654U, 24916U, + 25178U, 25441U, 25703U, 25965U, 26228U, 26490U, + 26752U, 27014U, 27277U, 27539U, 27801U, 28063U, + 28326U, 28588U, 28850U, 29113U, 29375U, 29637U, + 29899U, 30162U, 30424U, 30686U, 30948U, 31211U, + 31473U, 31735U, 31998U, 32260U, 32522U, 32784U, + 33047U, 33309U, 33571U, 33833U, 34096U, 34358U, + 34620U, 34883U, 35145U, 35407U, 35669U, 35932U, + 36194U, 36456U, 36719U, 36981U, 37243U, 37505U, + 37768U, 38030U, 38292U, 38554U, 38817U, 39079U, + 39341U, 39604U, 39866U, 40128U, 40390U, 40653U, + 40915U, 41177U, 41439U, 41702U, 41964U, 42226U, + 42489U, 42751U, 43013U, 43275U, 43538U, 43800U, + 44062U, 44324U, 44587U, 44849U, 45111U, 45374U, + 45636U, 45898U, 46160U, 46423U, 46685U, 46947U, + 47210U, 47472U, 47734U, 47996U, 48259U, 48521U, + 48783U, 49045U, 49308U, 49570U, 49832U, 50095U, + 50357U, 50619U, 50881U, 51144U, 51406U, 51668U, + 51930U, 52193U, 52455U, 52717U, 52980U, 53242U, + 53504U, 53766U, 54029U, 54291U, 54553U, 54816U, + 55078U, 55340U, 55602U, 55865U, 56127U, 56389U, + 56651U, 56914U, 57176U, 57438U, 57701U, 57963U, + 58225U, 58487U, 58750U, 59012U, 59274U, 59536U, + 59799U, 60061U, 60323U, 60586U, 60848U, 61110U, + 61372U, 61635U, 61897U, 62159U, 62421U, 62684U, + 62946U, 63208U, 63471U, 63733U, 63995U, 64257U, + 64520U, 64782U, 65044U, 65307U, UINT16_MAX, UINT16_MAX}; +static void test_math_arithmetic_sqrt_fixed(void **state) +{ + (void)state; + + uint32_t u[252]; + int i; + double y; + double diff; + + memcpy_s((void *)&u[0], sizeof(u), (void *)&uv[0], 252U * sizeof(uint32_t)); + for (i = 0; i < ARRAY_SIZE(sqrt_ref_table); i++) { + y = Q_CONVERT_QTOF(sqrt_int16(u[i]), 12); + diff = fabs(sqrt_ref_table[i] - y); + + if (diff > CMP_TOLERANCE) { + printf("%s: diff for %.16f: reftbl = %.16f, sqrt = %.16f\n", __func__, + diff, (double)sqrt_ref_table[i], y); + assert_true(diff <= CMP_TOLERANCE); + } + } +} + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_math_arithmetic_sqrt_fixed) + }; + + cmocka_set_message_output(CM_OUTPUT_TAP); + + return cmocka_run_group_tests(tests, NULL, NULL); +}