Skip to content

Commit 1212e15

Browse files
math : Add square root function using lookup table
fix point math square function having positive number y as input and return the positive number x multiplied by itself (squared) Signed-off-by: Shriram Shastry <malladi.sastry@intel.com>
1 parent 3e1c160 commit 1212e15

File tree

6 files changed

+341
-1
lines changed

6 files changed

+341
-1
lines changed

src/include/sof/math/sqrt.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/* SPDX-License-Identifier: BSD-3-Clause
2+
*
3+
* Copyright(c) 2021 Intel Corporation. All rights reserved.
4+
*
5+
* Author: Shriram Shastry <malladi.sastry@linux.intel.com>
6+
*
7+
*/
8+
9+
#ifndef __SOF_MATH__SQRTLOOKUP__H
10+
#define __SOF_MATH__SQRTLOOKUP__H
11+
12+
#include <stdint.h>
13+
14+
uint16_t sqrt_int16(uint16_t u);
15+
#endif

src/math/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ if(CONFIG_CORDIC_FIXED)
1010
add_local_sources(sof trig.c)
1111
endif()
1212

13+
if(CONFIG_SQRT_FIXED)
14+
add_local_sources(sof sqrt_int16.c)
15+
endif()
16+
1317
if(CONFIG_MATH_DECIBELS)
1418
add_local_sources(sof decibels.c)
1519
endif()

src/math/Kconfig

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ config POWER_FIXED
2020
with base having values from -32 to + 32 . Exponent values range from
2121
-3 to +3. Power out MIN/MAX range is -/+32768.
2222

23-
2423
config BINARY_LOGARITHM_FIXED
2524
bool "Binary Logarithm function"
2625
default n
@@ -31,6 +30,14 @@ config BINARY_LOGARITHM_FIXED
3130
with a short lookup table. (log2n) operates for a range of 32 bit width size
3231
i.e. 1 to 4294967295.
3332

33+
config SQRT_FIXED
34+
bool "Square Root functions"
35+
default n
36+
help
37+
Select this to enable sqrt_int() functions as 16 bit version
38+
to calculate square root.square function having positive number
39+
y as input and return the positive number x multiplied by itself (squared)
40+
3441
config NUMBERS_GCD
3542
bool "Greatest common divisor"
3643
default n

src/math/sqrt_int16.c

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
// SPDX-License-Identifier: BSD-3-Clause
2+
//
3+
// Copyright(c) 2021 Intel Corporation. All rights reserved.
4+
//
5+
// Author: Shriram Shastry <malladi.sastry@linux.intel.com>
6+
//
7+
//
8+
9+
#include <sof/math/sqrt.h>
10+
11+
#define SQRT_WRAP_SCHAR_BITS 0xFF
12+
13+
/*
14+
* Square root
15+
*
16+
* Y = SQRTLOOKUP_INT16(U) computes the square root of
17+
* U using lookup tables.
18+
* Range of u is [0 to 65535]
19+
* Range of y is [0 to 4]
20+
* +------------------+-----------------+--------+--------+
21+
* | u | y (returntype) | u | y |
22+
* +----+-----+-------+----+----+-------+--------+--------+
23+
* |WLen| FLen|Signbit|WLen|FLen|Signbit| Qformat| Qformat|
24+
* +----+-----+-------+----+----+-------+--------+--------+
25+
* | 16 | 12 | 0 | 16 | 12 | 1 | 4.12 | 4.12 |
26+
* +------------------+-----------------+--------+--------+
27+
28+
* Arguments : uint16_t u
29+
* Return Type : int32_t
30+
*/
31+
uint16_t sqrt_int16(uint16_t u)
32+
{
33+
static const int32_t iv1[193] = {
34+
46341, 46702, 47059, 47415, 47767, 48117, 48465, 48809, 49152, 49492, 49830, 50166,
35+
50499, 50830, 51159, 51486, 51811, 52134, 52454, 52773, 53090, 53405, 53719, 54030,
36+
54340, 54647, 54954, 55258, 55561, 55862, 56162, 56459, 56756, 57051, 57344, 57636,
37+
57926, 58215, 58503, 58789, 59073, 59357, 59639, 59919, 60199, 60477, 60753, 61029,
38+
61303, 61576, 61848, 62119, 62388, 62657, 62924, 63190, 63455, 63719, 63982, 64243,
39+
64504, 64763, 65022, 65279, 65536, 65792, 66046, 66300, 66552, 66804, 67054, 67304,
40+
67553, 67801, 68048, 68294, 68539, 68784, 69027, 69270, 69511, 69752, 69992, 70232,
41+
70470, 70708, 70945, 71181, 71416, 71651, 71885, 72118, 72350, 72581, 72812, 73042,
42+
73271, 73500, 73728, 73955, 74182, 74408, 74633, 74857, 75081, 75304, 75527, 75748,
43+
75969, 76190, 76410, 76629, 76848, 77066, 77283, 77500, 77716, 77932, 78147, 78361,
44+
78575, 78788, 79001, 79213, 79424, 79635, 79846, 80056, 80265, 80474, 80682, 80890,
45+
81097, 81303, 81509, 81715, 81920, 82125, 82329, 82532, 82735, 82938, 83140, 83341,
46+
83542, 83743, 83943, 84143, 84342, 84540, 84739, 84936, 85134, 85331, 85527, 85723,
47+
85918, 86113, 86308, 86502, 86696, 86889, 87082, 87275, 87467, 87658, 87849, 88040,
48+
88231, 88420, 88610, 88799, 88988, 89176, 89364, 89552, 89739, 89926, 90112, 90298,
49+
90484, 90669, 90854, 91038, 91222, 91406, 91589, 91772, 91955, 92137, 92319, 92501,
50+
92682};
51+
static const int8_t iv[256] = {
52+
8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
53+
3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
54+
2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
61+
int32_t xfi_tmp;
62+
int32_t y;
63+
uint16_t v;
64+
int32_t a_i;
65+
int32_t l1_i;
66+
int32_t l2_i;
67+
int num_left_shifts;
68+
int shift_factor;
69+
int xfi;
70+
unsigned int slice_temp;
71+
int sign = 1;
72+
73+
if (!u)
74+
return 0;
75+
/* Normalize the input such that u = x * 2^n and 0.5 <= x < 2
76+
* Normalize to the range [1, 2)
77+
* normalizes the input U
78+
* such that the output X is
79+
* U = X*2^N
80+
* 1 <= X < 2
81+
* The output X is unsigned with one integer bit.
82+
* The input U must be scalar and positive.
83+
* The number of bits in a byte is assumed to be B=8.
84+
* Reinterpret the input as an unsigned integer.
85+
* Unroll the loop in generated code so there will be no branching.
86+
* For each iteration, see how many leading zeros are in the high
87+
* byte of V, and shift them out to the left. Continue with the
88+
* shifted V for as many bytes as it has.
89+
* The index is the high byte of the input plus 1 to make it a
90+
* one-based index.
91+
* Index into the number-of-leading-zeros lookup table. This lookup
92+
* table takes in a byte and returns the number of leading zeros in the
93+
* binary representation.
94+
*/
95+
96+
shift_factor = iv[u >> 8];
97+
/* Left-shift out all the leading zeros in the high byte. */
98+
v = u << shift_factor;
99+
/* Update the total number of left-shifts */
100+
num_left_shifts = shift_factor;
101+
/* For each iteration, see how many leading zeros are in the high
102+
* byte of V, and shift them out to the left. Continue with the
103+
* shifted V for as many bytes as it has.
104+
* The index is the high byte of the input plus 1 to make it a
105+
* one-based index.
106+
* Index into the number-of-leading-zeros lookup table. This lookup
107+
* table takes in a byte and returns the number of leading zeros in the
108+
* binary representation.
109+
*/
110+
shift_factor = iv[v >> 8];
111+
/* Left-shift out all the leading zeros in the high byte.
112+
* Update the total number of left-shifts
113+
*/
114+
num_left_shifts += shift_factor;
115+
/* The input has been left-shifted so the most-significant-bit is a 1.
116+
* Reinterpret the output as unsigned with one integer bit, so
117+
* that 1 <= x < 2.
118+
* Let Q = int(u). Then u = Q*2^(-u_fraction_length),
119+
* and x = Q*2^num_left_shifts * 2^(1-word_length). Therefore,
120+
* u = x*2^n, where n is defined as:
121+
*/
122+
xfi_tmp = (3 - num_left_shifts) & 1;
123+
v = (v << shift_factor) >> xfi_tmp;
124+
/* Extract the high byte of x */
125+
/* Convert the high byte into an index for SQRTLUT */
126+
slice_temp = SQRT_WRAP_SCHAR_BITS & (v >> 8);
127+
/* The upper byte was used for the index into SQRTLUT.
128+
* The remainder, r, interpreted as a fraction, is used to
129+
* linearly interpolate between points.
130+
*/
131+
a_i = iv1[((v >> 8) - 63) - 1];
132+
a_i <<= 8;
133+
l1_i = iv1[SQRT_WRAP_SCHAR_BITS & ((slice_temp + 194) - 1)];
134+
l2_i = iv1[(slice_temp - 63) - 1];
135+
y = a_i + (v & SQRT_WRAP_SCHAR_BITS) * (l1_i - l2_i);
136+
xfi = (((xfi_tmp - num_left_shifts) + 3) >> 1);
137+
shift_factor = (((xfi_tmp - num_left_shifts) + 3) >> 1);
138+
if (xfi != 0) {
139+
if (xfi > 0)
140+
y <<= (shift_factor >= 32) ? 0 : shift_factor;
141+
else
142+
y >>= -sign * xfi;
143+
}
144+
y = ((y >> 11) + 1) >> 1;
145+
146+
return y;
147+
}

test/cmocka/src/math/arithmetic/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,8 @@ cmocka_test(base2_logarithm
99
base2_logarithm.c
1010
${PROJECT_SOURCE_DIR}/src/math/base2log.c
1111
)
12+
13+
cmocka_test(square_root
14+
square_root.c
15+
${PROJECT_SOURCE_DIR}/src/math/sqrt_int16.c
16+
)
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
// SPDX-License-Identifier: BSD-3-Clause
2+
//
3+
// Copyright(c) 2021 Intel Corporation. All rights reserved.
4+
//
5+
// Author: Shriram Shastry <malladi.sastry@linux.intel.com>
6+
7+
#include <stdio.h>
8+
#include <stdint.h>
9+
#include <stdarg.h>
10+
#include <stddef.h>
11+
#include <setjmp.h>
12+
#include <math.h>
13+
#include <cmocka.h>
14+
15+
#include <sof/math/sqrt.h>
16+
#include <sof/audio/format.h>
17+
#include <sof/string.h>
18+
#include <sof/common.h>
19+
20+
/* 'Error[max] = 0.0003000860000000,THD(-dBc) = -87.1210823527511309' */
21+
#define CMP_TOLERANCE 0.0001689942
22+
23+
static const double sqrt_ref_table[] = {
24+
0.0000000000000000, 0.2529127196287289, 0.3580137261684250, 0.4383362543470480,
25+
0.5060667106469264, 0.5657458434447398, 0.6199010757774179, 0.6695089151758922,
26+
0.7156864056624241, 0.7590598625931949, 0.8002380017063674, 0.8392530626247365,
27+
0.8765332548597343, 0.9124250825410271, 0.9468285879714448, 0.9800251112854200,
28+
1.0121334212938529, 1.0433710015497843, 1.0735864616438677, 1.1029744939820685,
29+
1.1317074351394887, 1.1596234572049671, 1.1868830634270588, 1.2135304899342250,
30+
1.2397036881347898, 1.2652391387105444, 1.2902693214499832, 1.3148230929007141,
31+
1.3390178303517843, 1.3626935069009465, 1.3859648038460428, 1.4089384024417106,
32+
1.4314580907679415, 1.4536289448738284, 1.4754666899408471, 1.4970674458754356,
33+
1.5182805344369004, 1.5392012945030940, 1.5598414883410430, 1.5802893574042065,
34+
1.6003997303408295, 1.6202605162827983, 1.6399552204252408, 1.6593426315110451,
35+
1.6785061252494731, 1.6974532854396907, 1.7162624043615824, 1.7347972458979175,
36+
1.7531361407845656, 1.7712851751976586, 1.7893183496097054, 1.8071040368501201,
37+
1.8247163735084968, 1.8422265952170487, 1.8595062978852748, 1.8766268983537990,
38+
1.8935927121149891, 1.9104717594746068, 1.9271396388170734, 1.9436645881555799,
39+
1.9601125007572908, 1.9763617734046062, 1.9924785326635266, 2.0084659685628234,
40+
2.0243874459327196, 2.0401248429936829, 2.0557417684986605, 2.0712409474756917,
41+
2.0866835042418388, 2.1019545405705138, 2.1171154277400652, 2.1322257663648099,
42+
2.1471729232877355, 2.1620167451363552, 2.1767593459084997, 2.1914584719713490,
43+
2.2060043241401410, 2.2204548907543695, 2.2348120201987909, 2.2491317769308226,
44+
2.2633070038662453, 2.2773940013752560, 2.2914476694602910, 2.3053627188850347,
45+
2.3191942802134968, 2.3329438383992445, 2.3466648541067809, 2.3602543890966499,
46+
2.3737661268541177, 2.3872013883939496, 2.4006123079591588, 2.4138981537908761,
47+
2.4271112748749282, 2.4403028756693299, 2.4533737931163282, 2.4663754402969551,
48+
2.4793089069839604, 2.4922242356226696, 2.5050242482608827, 2.5177591878742098,
49+
2.5304782774210888, 2.5430857547967194, 2.5556310375326090, 2.5681150370943278,
50+
2.5805859466650203, 2.5929498012639969, 2.6052549809231724, 2.6175023131556161,
51+
2.6297390257875399, 2.6418728560436060, 2.6539512111660981, 2.6660206330081166,
52+
2.6779900782816579, 2.6899062628881700, 2.7017698915479462, 2.7136266381449752,
53+
2.7253870138018930, 2.7370968595849874, 2.7487568212739375, 2.7604117531402812,
54+
2.7719736453698474, 2.7834875128828940, 2.7949976241045360, 2.8064170328908711,
55+
2.8177901636300033, 2.8291175744390689, 2.8404427884354582, 2.8516802201728368,
56+
2.8628735427669523, 2.8740232715872360, 2.8851722218959477, 2.8962361080806240,
57+
2.9072578897476569, 2.9182798738083706, 2.9292187124939990, 2.9401168530136688,
58+
2.9509747462702896, 2.9618340496219568, 2.9726126187665289, 2.9833522462156559,
59+
2.9940941216626773, 3.0047569707257522, 3.0153821145710538, 3.0259699503836783,
60+
3.0365610688738007, 3.0470753139280951, 3.0575534030495688, 3.0679957066870220,
61+
3.0784422425351754, 3.0888139284157279, 3.0991509043809078, 3.1094927741514371,
62+
3.1197612338526808, 3.1299960063872287, 3.1401974211424988, 3.1504045499149789,
63+
3.1605400917999757, 3.1706432337342845, 3.1807142844611178, 3.1907918051402224,
64+
3.2007994606816590, 3.2107759235502562, 3.2207593849316032, 3.2306742112715421,
65+
3.2405587023112234, 3.2504131347991749, 3.2602752232365293, 3.2700702400713046,
66+
3.2798360048560355, 3.2895727781126838, 3.2993178153786578, 3.3089972636170311,
67+
3.3186484800856810, 3.3283083869662677, 3.3379037677111065, 3.3474716438306089,
68+
3.3570122504989461, 3.3665620793882591, 3.3760487375221642, 3.3855088128485207,
69+
3.3949784839156196, 3.4043859578490805, 3.4137675072784321, 3.4231233453528955,
70+
3.4324892457042018, 3.4417941928048226, 3.4510740515635128, 3.4603290238249023,
71+
3.4695944917958350, 3.4788001927748020, 3.4879815975718680, 3.4971738031409019,
72+
3.5063070962374359, 3.5154166604934614, 3.5245026799003858, 3.5335998818485379,
73+
3.5426392659640071, 3.5516556438511886, 3.5606491902811768, 3.5696542746637245,
74+
3.5786025882144274, 3.5875285822032135, 3.5964663647113397, 3.6053481324623839,
75+
3.6142080737002402, 3.6230463485511746, 3.6318967259718442, 3.6406920594682268,
76+
3.6494661959833250, 3.6582526566654741, 3.6669847755001657, 3.6756961500510350,
77+
3.6843869274616097, 3.6930903069956198, 3.7017402474207994, 3.7103700224000571,
78+
3.7189797723132347, 3.7276023837381045, 3.7361724230822109, 3.7447228493908598,
79+
3.7532863204297375, 3.7617978476886553, 3.7702901600042669, 3.7787633869263368,
80+
3.7872498886065071, 3.7956852559847478, 3.8041019184887777, 3.8125000000000000,
81+
3.8209115711273665, 3.8292727871131094, 3.8376157861196840, 3.8459724266107265,
82+
3.8542792776341468, 3.8625682639598748, 3.8708395003538962, 3.8791245690071618,
83+
3.8873605782876637, 3.8955791750874478, 3.9037804693815712, 3.9119957742180653,
84+
3.9201627238228260, 3.9283126944020128, 3.9364768015796816, 3.9445930655930783,
85+
3.9526926641056979, 3.9607756993580185, 3.9688730295891301, 3.9769231786332004,
86+
3.9849570653270532, 3.9930053589840071, 3.9999694823054588, 3.9999694823054588};
87+
/* testvector in Q4.12 */
88+
static const uint32_t uv[252] = {
89+
0U, 262U, 525U, 787U, 1049U, 1311U,
90+
1574U, 1836U, 2098U, 2360U, 2623U, 2885U,
91+
3147U, 3410U, 3672U, 3934U, 4196U, 4459U,
92+
4721U, 4983U, 5246U, 5508U, 5770U, 6032U,
93+
6295U, 6557U, 6819U, 7081U, 7344U, 7606U,
94+
7868U, 8131U, 8393U, 8655U, 8917U, 9180U,
95+
9442U, 9704U, 9966U, 10229U, 10491U, 10753U,
96+
11016U, 11278U, 11540U, 11802U, 12065U, 12327U,
97+
12589U, 12851U, 13114U, 13376U, 13638U, 13901U,
98+
14163U, 14425U, 14687U, 14950U, 15212U, 15474U,
99+
15737U, 15999U, 16261U, 16523U, 16786U, 17048U,
100+
17310U, 17572U, 17835U, 18097U, 18359U, 18622U,
101+
18884U, 19146U, 19408U, 19671U, 19933U, 20195U,
102+
20457U, 20720U, 20982U, 21244U, 21507U, 21769U,
103+
22031U, 22293U, 22556U, 22818U, 23080U, 23342U,
104+
23605U, 23867U, 24129U, 24392U, 24654U, 24916U,
105+
25178U, 25441U, 25703U, 25965U, 26228U, 26490U,
106+
26752U, 27014U, 27277U, 27539U, 27801U, 28063U,
107+
28326U, 28588U, 28850U, 29113U, 29375U, 29637U,
108+
29899U, 30162U, 30424U, 30686U, 30948U, 31211U,
109+
31473U, 31735U, 31998U, 32260U, 32522U, 32784U,
110+
33047U, 33309U, 33571U, 33833U, 34096U, 34358U,
111+
34620U, 34883U, 35145U, 35407U, 35669U, 35932U,
112+
36194U, 36456U, 36719U, 36981U, 37243U, 37505U,
113+
37768U, 38030U, 38292U, 38554U, 38817U, 39079U,
114+
39341U, 39604U, 39866U, 40128U, 40390U, 40653U,
115+
40915U, 41177U, 41439U, 41702U, 41964U, 42226U,
116+
42489U, 42751U, 43013U, 43275U, 43538U, 43800U,
117+
44062U, 44324U, 44587U, 44849U, 45111U, 45374U,
118+
45636U, 45898U, 46160U, 46423U, 46685U, 46947U,
119+
47210U, 47472U, 47734U, 47996U, 48259U, 48521U,
120+
48783U, 49045U, 49308U, 49570U, 49832U, 50095U,
121+
50357U, 50619U, 50881U, 51144U, 51406U, 51668U,
122+
51930U, 52193U, 52455U, 52717U, 52980U, 53242U,
123+
53504U, 53766U, 54029U, 54291U, 54553U, 54816U,
124+
55078U, 55340U, 55602U, 55865U, 56127U, 56389U,
125+
56651U, 56914U, 57176U, 57438U, 57701U, 57963U,
126+
58225U, 58487U, 58750U, 59012U, 59274U, 59536U,
127+
59799U, 60061U, 60323U, 60586U, 60848U, 61110U,
128+
61372U, 61635U, 61897U, 62159U, 62421U, 62684U,
129+
62946U, 63208U, 63471U, 63733U, 63995U, 64257U,
130+
64520U, 64782U, 65044U, 65307U, UINT16_MAX, UINT16_MAX};
131+
static void test_math_arithmetic_sqrt_fixed(void **state)
132+
{
133+
(void)state;
134+
135+
uint32_t u[252];
136+
int i;
137+
double y;
138+
double diff;
139+
140+
memcpy_s((void *)&u[0], sizeof(u), (void *)&uv[0], 252U * sizeof(uint32_t));
141+
for (i = 0; i < ARRAY_SIZE(sqrt_ref_table); i++) {
142+
y = Q_CONVERT_QTOF(sqrt_int16(u[i]), 12);
143+
diff = fabs(sqrt_ref_table[i] - y);
144+
145+
if (diff > CMP_TOLERANCE) {
146+
printf("%s: diff for %.16f: reftbl = %.16f, sqrt = %.16f\n", __func__,
147+
diff, (double)sqrt_ref_table[i], y);
148+
assert_true(diff <= CMP_TOLERANCE);
149+
}
150+
}
151+
}
152+
153+
int main(void)
154+
{
155+
const struct CMUnitTest tests[] = {
156+
cmocka_unit_test(test_math_arithmetic_sqrt_fixed)
157+
};
158+
159+
cmocka_set_message_output(CM_OUTPUT_TAP);
160+
161+
return cmocka_run_group_tests(tests, NULL, NULL);
162+
}

0 commit comments

Comments
 (0)