thesofproject · lgirdwood · Dec 21, 2021 · Nov 26, 2021
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright(c) 2021 Intel Corporation. All rights reserved.
+ *
+ * Author: Shriram Shastry <malladi.sastry@linux.intel.com>
+ *
+ */
+
+#ifndef __SOF_MATH__SQRTLOOKUP__H
+#define __SOF_MATH__SQRTLOOKUP__H
+
+#include <stdint.h>
+
+uint16_t sqrt_int16(uint16_t u);
+#endif
@@ -10,6 +10,10 @@ if(CONFIG_CORDIC_FIXED)
         add_local_sources(sof trig.c)
 endif()
 
+if(CONFIG_SQRT_FIXED)
+        add_local_sources(sof sqrt_int16.c)
+endif()
+
 if(CONFIG_MATH_DECIBELS)
         add_local_sources(sof decibels.c)
 endif()

@@ -20,7 +20,6 @@ config POWER_FIXED
 	  with base having values from -32 to + 32 . Exponent values range from
 	  -3 to +3. Power out MIN/MAX range is -/+32768.
 
-
 config BINARY_LOGARITHM_FIXED
        bool "Binary Logarithm function"
        default n
@@ -31,6 +30,14 @@ config BINARY_LOGARITHM_FIXED
 	  with a short lookup table. (log2n) operates for a range of 32 bit width size
 	  i.e. 1 to 4294967295.
 
+config SQRT_FIXED
+	bool "Square Root functions"
+	default n
+	help
+	  Select this to enable sqrt_int() functions as 16 bit version
+	  to  calculate square root.square function having positive number
+	  y as input and return the positive number x multiplied by itself (squared)
+
 config NUMBERS_GCD
 	bool "Greatest common divisor"
 	default n

@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: BSD-3-Clause
+//
+// Copyright(c) 2021 Intel Corporation. All rights reserved.
+//
+// Author: Shriram Shastry <malladi.sastry@linux.intel.com>
+//
+//
+
+#include <sof/math/sqrt.h>
+
+#define SQRT_WRAP_SCHAR_BITS 0xFF
+
+/*
+ *  Square root
+ *
+ *  Y = SQRTLOOKUP_INT16(U) computes the square root of
+ *  U using lookup tables.
+ *  Range of u is [0 to 65535]
+ *  Range of y is [0 to 4]
+ * +------------------+-----------------+--------+--------+
+ * | u		      | y (returntype)	|   u	 |  y	  |
+ * +----+-----+-------+----+----+-------+--------+--------+
+ * |WLen| FLen|Signbit|WLen|FLen|Signbit| Qformat| Qformat|
+ * +----+-----+-------+----+----+-------+--------+--------+
+ * | 16 | 12  |	 0    | 16 | 12 |   1	| 4.12	 | 4.12	  |
+ * +------------------+-----------------+--------+--------+
+
+ * Arguments	: uint16_t u
+ * Return Type	: int32_t
+ */
+uint16_t sqrt_int16(uint16_t u)
+{
+	static const int32_t iv1[193] = {
+	    46341, 46702, 47059, 47415, 47767, 48117, 48465, 48809, 49152, 49492, 49830, 50166,
+	    50499, 50830, 51159, 51486, 51811, 52134, 52454, 52773, 53090, 53405, 53719, 54030,
+	    54340, 54647, 54954, 55258, 55561, 55862, 56162, 56459, 56756, 57051, 57344, 57636,
+	    57926, 58215, 58503, 58789, 59073, 59357, 59639, 59919, 60199, 60477, 60753, 61029,
+	    61303, 61576, 61848, 62119, 62388, 62657, 62924, 63190, 63455, 63719, 63982, 64243,
+	    64504, 64763, 65022, 65279, 65536, 65792, 66046, 66300, 66552, 66804, 67054, 67304,
+	    67553, 67801, 68048, 68294, 68539, 68784, 69027, 69270, 69511, 69752, 69992, 70232,
+	    70470, 70708, 70945, 71181, 71416, 71651, 71885, 72118, 72350, 72581, 72812, 73042,
+	    73271, 73500, 73728, 73955, 74182, 74408, 74633, 74857, 75081, 75304, 75527, 75748,
+	    75969, 76190, 76410, 76629, 76848, 77066, 77283, 77500, 77716, 77932, 78147, 78361,
+	    78575, 78788, 79001, 79213, 79424, 79635, 79846, 80056, 80265, 80474, 80682, 80890,
+	    81097, 81303, 81509, 81715, 81920, 82125, 82329, 82532, 82735, 82938, 83140, 83341,
+	    83542, 83743, 83943, 84143, 84342, 84540, 84739, 84936, 85134, 85331, 85527, 85723,
+	    85918, 86113, 86308, 86502, 86696, 86889, 87082, 87275, 87467, 87658, 87849, 88040,
+	    88231, 88420, 88610, 88799, 88988, 89176, 89364, 89552, 89739, 89926, 90112, 90298,
+	    90484, 90669, 90854, 91038, 91222, 91406, 91589, 91772, 91955, 92137, 92319, 92501,
+	    92682};
+	static const int8_t iv[256] = {
+	    8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+	    3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+	    2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+	int32_t xfi_tmp;
+	int32_t y;
+	uint16_t v;
+	int32_t a_i;
+	int32_t l1_i;
+	int32_t l2_i;
+	int num_left_shifts;
+	int shift_factor;
+	int xfi;
+	unsigned int slice_temp;
+	int sign = 1;
+
+	if (!u)
+		return 0;
+	/* Normalize the input such that u = x * 2^n and 0.5 <= x < 2
+	 * Normalize to the range [1, 2)
+	 * normalizes the input U
+	 * such that the output X is
+	 * U = X*2^N
+	 * 1 <= X < 2
+	 * The output X is unsigned with one integer bit.
+	 * The input U must be scalar and positive.
+	 * The number of bits in a byte is assumed to be B=8.
+	 * Reinterpret the input as an unsigned integer.
+	 * Unroll the loop in generated code so there will be no branching.
+	 * For each iteration, see how many leading zeros are in the high
+	 * byte of V, and shift them out to the left. Continue with the
+	 * shifted V for as many bytes as it has.
+	 * The index is the high byte of the input plus 1 to make it a
+	 * one-based index.
+	 * Index into the number-of-leading-zeros lookup table.  This lookup
+	 * table takes in a byte and returns the number of leading zeros in the
+	 * binary representation.
+	 */
+
+	shift_factor = iv[u >> 8];
+	/*  Left-shift out all the leading zeros in the high byte. */
+	v = u << shift_factor;
+	/*  Update the total number of left-shifts */
+	num_left_shifts = shift_factor;
+	/* For each iteration, see how many leading zeros are in the high
+	 * byte of V, and shift them out to the left. Continue with the
+	 * shifted V for as many bytes as it has.
+	 * The index is the high byte of the input plus 1 to make it a
+	 * one-based index.
+	 * Index into the number-of-leading-zeros lookup table.  This lookup
+	 * table takes in a byte and returns the number of leading zeros in the
+	 * binary representation.
+	 */
+	shift_factor = iv[v >> 8];
+	/*  Left-shift out all the leading zeros in the high byte.
+	 *  Update the total number of left-shifts
+	 */
+	num_left_shifts += shift_factor;
+	/*  The input has been left-shifted so the most-significant-bit is a 1.
+	 *  Reinterpret the output as unsigned with one integer bit, so
+	 *  that 1 <= x < 2.
+	 *  Let Q = int(u).  Then u = Q*2^(-u_fraction_length),
+	 *  and x = Q*2^num_left_shifts * 2^(1-word_length).  Therefore,
+	 *  u = x*2^n, where n is defined as:
+	 */
+	xfi_tmp = (3 - num_left_shifts) & 1;
+	v = (v << shift_factor) >> xfi_tmp;
+	/*  Extract the high byte of x */
+	/*  Convert the high byte into an index for SQRTLUT */
+	slice_temp = SQRT_WRAP_SCHAR_BITS & (v >> 8);
+	/*  The upper byte was used for the index into SQRTLUT.
+	 *  The remainder, r, interpreted as a fraction, is used to
+	 *  linearly interpolate between points.
+	 */
+	a_i = iv1[((v >> 8) - 63) - 1];
+	a_i <<= 8;
+	l1_i = iv1[SQRT_WRAP_SCHAR_BITS & ((slice_temp + 194) - 1)];
+	l2_i = iv1[(slice_temp - 63) - 1];
+	y = a_i + (v & SQRT_WRAP_SCHAR_BITS) * (l1_i - l2_i);
+	xfi = (((xfi_tmp - num_left_shifts) + 3) >> 1);
+	shift_factor = (((xfi_tmp - num_left_shifts) + 3) >> 1);
+	if (xfi != 0) {
+		if (xfi > 0)
+			y <<= (shift_factor >= 32) ? 0 : shift_factor;
+		else
+			y >>= -sign * xfi;
+	}
+	y = ((y >> 11) + 1) >> 1;
+
+	return y;
+}
@@ -9,3 +9,8 @@ cmocka_test(base2_logarithm
 	base2_logarithm.c
 	${PROJECT_SOURCE_DIR}/src/math/base2log.c
 )
+
+cmocka_test(square_root
+	square_root.c
+	${PROJECT_SOURCE_DIR}/src/math/sqrt_int16.c
+)
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: BSD-3-Clause
+//
+// Copyright(c) 2021 Intel Corporation. All rights reserved.
+//
+// Author: Shriram Shastry <malladi.sastry@linux.intel.com>
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <math.h>
+#include <cmocka.h>
+
+#include <sof/math/sqrt.h>
+#include <sof/audio/format.h>
+#include <sof/string.h>
+#include <sof/common.h>
+
+/* 'Error[max] = 0.0003000860000000,THD(-dBc) = -87.1210823527511309' */
+#define CMP_TOLERANCE 0.0001689942
+
+static const double sqrt_ref_table[] = {
+	    0.0000000000000000, 0.2529127196287289, 0.3580137261684250, 0.4383362543470480,
+	    0.5060667106469264, 0.5657458434447398, 0.6199010757774179, 0.6695089151758922,
+	    0.7156864056624241, 0.7590598625931949, 0.8002380017063674, 0.8392530626247365,
+	    0.8765332548597343, 0.9124250825410271, 0.9468285879714448, 0.9800251112854200,
+	    1.0121334212938529, 1.0433710015497843, 1.0735864616438677, 1.1029744939820685,
+	    1.1317074351394887, 1.1596234572049671, 1.1868830634270588, 1.2135304899342250,
+	    1.2397036881347898, 1.2652391387105444, 1.2902693214499832, 1.3148230929007141,
+	    1.3390178303517843, 1.3626935069009465, 1.3859648038460428, 1.4089384024417106,
+	    1.4314580907679415, 1.4536289448738284, 1.4754666899408471, 1.4970674458754356,
+	    1.5182805344369004, 1.5392012945030940, 1.5598414883410430, 1.5802893574042065,
+	    1.6003997303408295, 1.6202605162827983, 1.6399552204252408, 1.6593426315110451,
+	    1.6785061252494731, 1.6974532854396907, 1.7162624043615824, 1.7347972458979175,
+	    1.7531361407845656, 1.7712851751976586, 1.7893183496097054, 1.8071040368501201,
+	    1.8247163735084968, 1.8422265952170487, 1.8595062978852748, 1.8766268983537990,
+	    1.8935927121149891, 1.9104717594746068, 1.9271396388170734, 1.9436645881555799,
+	    1.9601125007572908, 1.9763617734046062, 1.9924785326635266, 2.0084659685628234,
+	    2.0243874459327196, 2.0401248429936829, 2.0557417684986605, 2.0712409474756917,
+	    2.0866835042418388, 2.1019545405705138, 2.1171154277400652, 2.1322257663648099,
+	    2.1471729232877355, 2.1620167451363552, 2.1767593459084997, 2.1914584719713490,
+	    2.2060043241401410, 2.2204548907543695, 2.2348120201987909, 2.2491317769308226,
+	    2.2633070038662453, 2.2773940013752560, 2.2914476694602910, 2.3053627188850347,
+	    2.3191942802134968, 2.3329438383992445, 2.3466648541067809, 2.3602543890966499,
+	    2.3737661268541177, 2.3872013883939496, 2.4006123079591588, 2.4138981537908761,
+	    2.4271112748749282, 2.4403028756693299, 2.4533737931163282, 2.4663754402969551,
+	    2.4793089069839604, 2.4922242356226696, 2.5050242482608827, 2.5177591878742098,
+	    2.5304782774210888, 2.5430857547967194, 2.5556310375326090, 2.5681150370943278,
+	    2.5805859466650203, 2.5929498012639969, 2.6052549809231724, 2.6175023131556161,
+	    2.6297390257875399, 2.6418728560436060, 2.6539512111660981, 2.6660206330081166,
+	    2.6779900782816579, 2.6899062628881700, 2.7017698915479462, 2.7136266381449752,
+	    2.7253870138018930, 2.7370968595849874, 2.7487568212739375, 2.7604117531402812,
+	    2.7719736453698474, 2.7834875128828940, 2.7949976241045360, 2.8064170328908711,
+	    2.8177901636300033, 2.8291175744390689, 2.8404427884354582, 2.8516802201728368,
+	    2.8628735427669523, 2.8740232715872360, 2.8851722218959477, 2.8962361080806240,
+	    2.9072578897476569, 2.9182798738083706, 2.9292187124939990, 2.9401168530136688,
+	    2.9509747462702896, 2.9618340496219568, 2.9726126187665289, 2.9833522462156559,
+	    2.9940941216626773, 3.0047569707257522, 3.0153821145710538, 3.0259699503836783,
+	    3.0365610688738007, 3.0470753139280951, 3.0575534030495688, 3.0679957066870220,
+	    3.0784422425351754, 3.0888139284157279, 3.0991509043809078, 3.1094927741514371,
+	    3.1197612338526808, 3.1299960063872287, 3.1401974211424988, 3.1504045499149789,
+	    3.1605400917999757, 3.1706432337342845, 3.1807142844611178, 3.1907918051402224,
+	    3.2007994606816590, 3.2107759235502562, 3.2207593849316032, 3.2306742112715421,
+	    3.2405587023112234, 3.2504131347991749, 3.2602752232365293, 3.2700702400713046,
+	    3.2798360048560355, 3.2895727781126838, 3.2993178153786578, 3.3089972636170311,
+	    3.3186484800856810, 3.3283083869662677, 3.3379037677111065, 3.3474716438306089,
+	    3.3570122504989461, 3.3665620793882591, 3.3760487375221642, 3.3855088128485207,
+	    3.3949784839156196, 3.4043859578490805, 3.4137675072784321, 3.4231233453528955,
+	    3.4324892457042018, 3.4417941928048226, 3.4510740515635128, 3.4603290238249023,
+	    3.4695944917958350, 3.4788001927748020, 3.4879815975718680, 3.4971738031409019,
+	    3.5063070962374359, 3.5154166604934614, 3.5245026799003858, 3.5335998818485379,
+	    3.5426392659640071, 3.5516556438511886, 3.5606491902811768, 3.5696542746637245,
+	    3.5786025882144274, 3.5875285822032135, 3.5964663647113397, 3.6053481324623839,
+	    3.6142080737002402, 3.6230463485511746, 3.6318967259718442, 3.6406920594682268,
+	    3.6494661959833250, 3.6582526566654741, 3.6669847755001657, 3.6756961500510350,
+	    3.6843869274616097, 3.6930903069956198, 3.7017402474207994, 3.7103700224000571,
+	    3.7189797723132347, 3.7276023837381045, 3.7361724230822109, 3.7447228493908598,
+	    3.7532863204297375, 3.7617978476886553, 3.7702901600042669, 3.7787633869263368,
+	    3.7872498886065071, 3.7956852559847478, 3.8041019184887777, 3.8125000000000000,
+	    3.8209115711273665, 3.8292727871131094, 3.8376157861196840, 3.8459724266107265,
+	    3.8542792776341468, 3.8625682639598748, 3.8708395003538962, 3.8791245690071618,
+	    3.8873605782876637, 3.8955791750874478, 3.9037804693815712, 3.9119957742180653,
+	    3.9201627238228260, 3.9283126944020128, 3.9364768015796816, 3.9445930655930783,
+	    3.9526926641056979, 3.9607756993580185, 3.9688730295891301, 3.9769231786332004,
+	    3.9849570653270532, 3.9930053589840071, 3.9999694823054588, 3.9999694823054588};
+/* testvector in Q4.12 */
+static const uint32_t uv[252] = {
+	    0U,		  262U,		525U,	      787U,	    1049U,	  1311U,
+	    1574U,	  1836U,	2098U,	      2360U,	    2623U,	  2885U,
+	    3147U,	  3410U,	3672U,	      3934U,	    4196U,	  4459U,
+	    4721U,	  4983U,	5246U,	      5508U,	    5770U,	  6032U,
+	    6295U,	  6557U,	6819U,	      7081U,	    7344U,	  7606U,
+	    7868U,	  8131U,	8393U,	      8655U,	    8917U,	  9180U,
+	    9442U,	  9704U,	9966U,	      10229U,	    10491U,	  10753U,
+	    11016U,	  11278U,	11540U,	      11802U,	    12065U,	  12327U,
+	    12589U,	  12851U,	13114U,	      13376U,	    13638U,	  13901U,
+	    14163U,	  14425U,	14687U,	      14950U,	    15212U,	  15474U,
+	    15737U,	  15999U,	16261U,	      16523U,	    16786U,	  17048U,
+	    17310U,	  17572U,	17835U,	      18097U,	    18359U,	  18622U,
+	    18884U,	  19146U,	19408U,	      19671U,	    19933U,	  20195U,
+	    20457U,	  20720U,	20982U,	      21244U,	    21507U,	  21769U,
+	    22031U,	  22293U,	22556U,	      22818U,	    23080U,	  23342U,
+	    23605U,	  23867U,	24129U,	      24392U,	    24654U,	  24916U,
+	    25178U,	  25441U,	25703U,	      25965U,	    26228U,	  26490U,
+	    26752U,	  27014U,	27277U,	      27539U,	    27801U,	  28063U,
+	    28326U,	  28588U,	28850U,	      29113U,	    29375U,	  29637U,
+	    29899U,	  30162U,	30424U,	      30686U,	    30948U,	  31211U,
+	    31473U,	  31735U,	31998U,	      32260U,	    32522U,	  32784U,
+	    33047U,	  33309U,	33571U,	      33833U,	    34096U,	  34358U,
+	    34620U,	  34883U,	35145U,	      35407U,	    35669U,	  35932U,
+	    36194U,	  36456U,	36719U,	      36981U,	    37243U,	  37505U,
+	    37768U,	  38030U,	38292U,	      38554U,	    38817U,	  39079U,
+	    39341U,	  39604U,	39866U,	      40128U,	    40390U,	  40653U,
+	    40915U,	  41177U,	41439U,	      41702U,	    41964U,	  42226U,
+	    42489U,	  42751U,	43013U,	      43275U,	    43538U,	  43800U,
+	    44062U,	  44324U,	44587U,	      44849U,	    45111U,	  45374U,
+	    45636U,	  45898U,	46160U,	      46423U,	    46685U,	  46947U,
+	    47210U,	  47472U,	47734U,	      47996U,	    48259U,	  48521U,
+	    48783U,	  49045U,	49308U,	      49570U,	    49832U,	  50095U,
+	    50357U,	  50619U,	50881U,	      51144U,	    51406U,	  51668U,
+	    51930U,	  52193U,	52455U,	      52717U,	    52980U,	  53242U,
+	    53504U,	  53766U,	54029U,	      54291U,	    54553U,	  54816U,
+	    55078U,	  55340U,	55602U,	      55865U,	    56127U,	  56389U,
+	    56651U,	  56914U,	57176U,	      57438U,	    57701U,	  57963U,
+	    58225U,	  58487U,	58750U,	      59012U,	    59274U,	  59536U,
+	    59799U,	  60061U,	60323U,	      60586U,	    60848U,	  61110U,
+	    61372U,	  61635U,	61897U,	      62159U,	    62421U,	  62684U,
+	    62946U,	  63208U,	63471U,	      63733U,	    63995U,	  64257U,
+	    64520U,	  64782U,	65044U,	      65307U,	    UINT16_MAX, UINT16_MAX};
+static void test_math_arithmetic_sqrt_fixed(void **state)
+{
+	(void)state;
+
+	uint32_t u[252];
+	int i;
+	double y;
+	double diff;
+
+	memcpy_s((void *)&u[0], sizeof(u), (void *)&uv[0], 252U * sizeof(uint32_t));
+	for (i = 0; i < ARRAY_SIZE(sqrt_ref_table); i++) {
+		y = Q_CONVERT_QTOF(sqrt_int16(u[i]), 12);
+		diff = fabs(sqrt_ref_table[i] - y);
+
+		if (diff > CMP_TOLERANCE) {
+			printf("%s: diff for %.16f: reftbl = %.16f, sqrt = %.16f\n", __func__,
+			       diff, (double)sqrt_ref_table[i], y);
+			assert_true(diff <= CMP_TOLERANCE);
+		}
+	}
+}
+
+int main(void)
+{
+	const struct CMUnitTest tests[] = {
+		cmocka_unit_test(test_math_arithmetic_sqrt_fixed)
+	};
+
+	cmocka_set_message_output(CM_OUTPUT_TAP);
+
+	return cmocka_run_group_tests(tests, NULL, NULL);
+}