From 3ba94c6b0bd0584bf0fef0d4b7e22c9cc21a1ec6 Mon Sep 17 00:00:00 2001
From: shastry <malladi.sastry@intel.com>
Date: Mon, 4 Dec 2023 18:21:42 +0530
Subject: [PATCH 1/5] Math: Exp: Rename and move common macros for generic and
 HiFi

The macros are moved to header file. There are no functional changes.

Signed-off-by: shastry <malladi.sastry@intel.com>
---
 src/include/sof/math/exp_fcn.h | 38 +++++++++++++++++++++++++++++++---
 src/math/exp_fcn.c             | 17 ++++++---------
 src/math/exp_fcn_hifi.c        | 18 +++++++---------
 3 files changed, 48 insertions(+), 25 deletions(-)

diff --git a/src/include/sof/math/exp_fcn.h b/src/include/sof/math/exp_fcn.h
index 74466f6bfb3a..50315b45a84a 100644
--- a/src/include/sof/math/exp_fcn.h
+++ b/src/include/sof/math/exp_fcn.h
@@ -5,8 +5,8 @@
  * Author: Shriram Shastry <malladi.sastry@linux.intel.com>
  *
  */
-#ifndef __SOFM_EXP_H__
-#define __SOFM_EXP_H__
+#ifndef __SOFM_EXP_FCN_H__
+#define __SOFM_EXP_FCN_H__
 
 #include <stdint.h>
 
@@ -26,6 +26,38 @@
 
 #endif
 
-int32_t sofm_exp_int32(int32_t x);
+/* TODO: Is there a MCPS difference */
+#define USING_QCONVERT	1
+
+#if USING_QCONVERT
+
+#include <sof/audio/format.h>
+#define SOFM_EXP_FIXED_INPUT_MIN	Q_CONVERT_FLOAT(-11.5, 27)		/* Q5.27 */
+#define SOFM_EXP_FIXED_INPUT_MAX	Q_CONVERT_FLOAT(7.6245, 27)		/* Q5.27 */
+#define SOFM_EXP_TWO_Q27		Q_CONVERT_FLOAT(2.0, 27)		/* Q5.27 */
+#define SOFM_EXP_MINUS_TWO_Q27		Q_CONVERT_FLOAT(-2.0, 27)		/* Q5.27 */
+#define SOFM_EXP_ONE_Q20		Q_CONVERT_FLOAT(1.0, 20)		/* Q12.20 */
+#define SOFM_EXP_MINUS_100_Q24		Q_CONVERT_FLOAT(-100.0, 24)		/* Q8.24 */
+#define SOFM_EXP_LOG10_DIV20_Q27	Q_CONVERT_FLOAT(0.1151292546, 27)	/* Q5.27 */
+
+#else
+
+#define SOFM_EXP_FIXED_INPUT_MIN	-1543503872	/* Q_CONVERT_FLOAT(-11.5, 27) */
+#define SOFM_EXP_FIXED_INPUT_MAX	 1023343067	/* Q_CONVERT_FLOAT(7.6245, 27) */
+#define SOFM_EXP_TWO_Q27		 268435456	/* Q_CONVERT_FLOAT(2.0, 27) */
+#define SOFM_EXP_MINUS_TWO_Q27		-268435456	/* Q_CONVERT_FLOAT(-2.0, 27) */
+#define SOFM_EXP_ONE_Q20		 1048576	/* Q_CONVERT_FLOAT(1.0, 20) */
+#define SOFM_EXP_MINUS_100_Q24		-1677721600	/* Q_CONVERT_FLOAT(-100.0, 24) */
+#define SOFM_EXP_LOG10_DIV20_Q27	 15452387	/* Q_CONVERT_FLOAT(0.1151292546, 27) */
 
 #endif
+
+#define SOFM_EXP_BIT_MASK_LOW_Q27P5 0x0000000008000000
+#define SOFM_EXP_BIT_MASK_Q62P2 0x4000000000000000LL
+#define SOFM_EXP_QUOTIENT_SCALE 0x40000000
+#define SOFM_EXP_TERMS_Q23P9 0x800000
+#define SOFM_EXP_LSHIFT_BITS 0x2000
+
+int32_t sofm_exp_int32(int32_t x);
+
+#endif /* __SOFM_EXP_FCN_H__ */
diff --git a/src/math/exp_fcn.c b/src/math/exp_fcn.c
index ada4b573e4f1..ef218666cb90 100644
--- a/src/math/exp_fcn.c
+++ b/src/math/exp_fcn.c
@@ -17,12 +17,7 @@
 
 #if defined(EXPONENTIAL_GENERIC)
 
-#define SOFM_BIT_MASK_Q62P2 0x4000000000000000LL
 #define SOFM_CONVERG_ERROR 28823037607936LL	// error smaller than 1e-4,1/2 ^ -44.7122876200884
-#define SOFM_BIT_MASK_LOW_Q27P5 0x8000000
-#define SOFM_QUOTIENT_SCALE BIT(30)
-#define SOFM_TERMS_Q23P9 8388608
-#define SOFM_LSHIFT_BITS 8192
 
 /* inv multiplication lookup table */
 /* LUT = ceil(1/factorial(b_n) * 2 ^ 63) */
@@ -155,7 +150,7 @@ static inline int64_t lomul_s64_sr_sat_near(int64_t a, int64_t b)
 	uint64_t u64_rlo;
 
 	mul_s64(a, b, &u64_rhi, &u64_rlo);
-	const bool roundup = (u64_rlo & SOFM_BIT_MASK_LOW_Q27P5) != 0;
+	const bool roundup = (u64_rlo & SOFM_EXP_BIT_MASK_LOW_Q27P5) != 0;
 
 	u64_rlo = (u64_rhi << 36 | u64_rlo >> 28) + (roundup ? 1 : 0);
 	return u64_rlo;
@@ -182,8 +177,8 @@ int32_t sofm_exp_int32(int32_t x)
 	uint64_t ou0Lo;
 	int64_t qt;
 	int32_t b_n;
-	int32_t ts =  SOFM_TERMS_Q23P9; /* Q23.9 */
-	int64_t dividend = (x + SOFM_LSHIFT_BITS) >> 14; /* x in Q50.14 */
+	int32_t ts =  SOFM_EXP_TERMS_Q23P9; /* Q23.9 */
+	int64_t dividend = (x + SOFM_EXP_LSHIFT_BITS) >> 14; /* x in Q50.14 */
 	static const int32_t i_emin = -1342177280; /* Q4.28 */
 	static const int32_t o_emin = 56601; /* Q9.23 */
 	static const int32_t i_emax = 1342177280; /* Q4.28 */
@@ -198,16 +193,16 @@ int32_t sofm_exp_int32(int32_t x)
 		return o_emax; /* 148.4131494760513306 in Q9.23 */
 
 	/* pre-computation of 1st & 2nd terms */
-	mul_s64(dividend, SOFM_BIT_MASK_Q62P2, &ou0Hi, &ou0Lo);
+	mul_s64(dividend, SOFM_EXP_BIT_MASK_Q62P2, &ou0Hi, &ou0Lo);
 	qt = (ou0Hi << 46) | (ou0Lo >> 18);/* Q6.26 */
-	ts += (int32_t)((qt >> 35) + ((qt & SOFM_QUOTIENT_SCALE) >> 18));
+	ts += (int32_t)((qt >> 35) + ((qt & SOFM_EXP_QUOTIENT_SCALE) >> 18));
 	dividend = lomul_s64_sr_sat_near(dividend, x);
 	for (b_n = 0; b_n < ARRAY_SIZE(exp_iv_ilookup); b_n++) {
 		mul_s64(dividend, exp_iv_ilookup[b_n], &ou0Hi, &ou0Lo);
 		qt = (ou0Hi << 45) | (ou0Lo >> 19);
 
 		/* sum of the remaining terms */
-		ts += (int32_t)((qt >> 35) + ((qt & SOFM_QUOTIENT_SCALE) ? 1 : 0));
+		ts += (int32_t)((qt >> 35) + ((qt & SOFM_EXP_QUOTIENT_SCALE) ? 1 : 0));
 		dividend = lomul_s64_sr_sat_near(dividend, x);
 
 		qt  = ABS(qt);
diff --git a/src/math/exp_fcn_hifi.c b/src/math/exp_fcn_hifi.c
index 2a65d4932864..73331877e70f 100644
--- a/src/math/exp_fcn_hifi.c
+++ b/src/math/exp_fcn_hifi.c
@@ -27,11 +27,7 @@
 #endif
 
 #define SOFM_CONVERG_ERROR 28823037624320LL /* error smaller than 1e-4,1/2 ^ -44.7122876209085 */
-#define SOFM_BIT_MASK_LOW_Q27P5 0x0000000008000000
-#define SOFM_BIT_MASK_Q62P2 0x4000000000000000LL
-#define SOFM_QUOTIENT_SCALE BIT(30)
-#define SOFM_TERMS_Q23P9 0x800000
-#define SOFM_LSHIFT_BITS 0x2000
+
 /*
  * Arguments	: int64_t in_0
  *		  int64_t in_1
@@ -186,7 +182,7 @@ static int64_t lomul_s64_sr_sat_near(int64_t a, int64_t b)
 
 	mul_s64(a, b, &u64_chi, &u64_clo);
 
-	ae_int64 roundup = AE_AND64(u64_clo, SOFM_BIT_MASK_LOW_Q27P5);
+	ae_int64 roundup = AE_AND64(u64_clo, SOFM_EXP_BIT_MASK_LOW_Q27P5);
 
 	roundup = AE_SRLI64(roundup, 27);
 	temp = AE_OR64(AE_SLAI64(u64_chi, 36), AE_SRLI64(u64_clo, 28));
@@ -240,15 +236,15 @@ int32_t sofm_exp_int32(int32_t x)
 	ae_int64 temp;
 
 	ae_int64 *ponebyfact_Q63 = &onebyfact_Q63[0];
-	ae_int64 ts = SOFM_TERMS_Q23P9;
-	ae_int64 mp = (x + SOFM_LSHIFT_BITS) >> 14; /* x in Q50.14 */;
+	ae_int64 ts = SOFM_EXP_TERMS_Q23P9;
+	ae_int64 mp = (x + SOFM_EXP_LSHIFT_BITS) >> 14; /* x in Q50.14 */;
 	xtbool flag;
 	int64_t b_n;
 
-	mul_s64(mp, SOFM_BIT_MASK_Q62P2, &outhi, &outlo);
+	mul_s64(mp, SOFM_EXP_BIT_MASK_Q62P2, &outhi, &outlo);
 	qt = AE_OR64(AE_SLAI64(outhi, 46), AE_SRLI64(outlo, 18));
 
-	temp = AE_SRAI64(AE_ADD64(qt, SOFM_QUOTIENT_SCALE), 35);
+	temp = AE_SRAI64(AE_ADD64(qt, SOFM_EXP_QUOTIENT_SCALE), 35);
 
 	ts = AE_ADD64(ts, temp);
 
@@ -260,7 +256,7 @@ int32_t sofm_exp_int32(int32_t x)
 		mul_s64(mp, onebyfact, &outhi, &outlo);
 		qt = AE_OR64(AE_SLAI64(outhi, 45), AE_SRLI64(outlo, 19));
 
-		temp = AE_SRAI64(AE_ADD64(qt, SOFM_QUOTIENT_SCALE), 35);
+		temp = AE_SRAI64(AE_ADD64(qt, SOFM_EXP_QUOTIENT_SCALE), 35);
 		ts = AE_ADD64(ts, temp);
 
 		mp = lomul_s64_sr_sat_near(mp, (int64_t)x);

From 45a97a7bb444c8e1a33e11543c832a0a20ff0a20 Mon Sep 17 00:00:00 2001
From: shastry <malladi.sastry@intel.com>
Date: Mon, 4 Dec 2023 21:46:07 +0530
Subject: [PATCH 2/5] Math: Exp: Fix warning: incompatible pointer type
 initialization.

Unused variables from HiFi4/5 were reshuffled and placed in order
to use HiFi3 code. If the variable 'ret' is used uninitialized
whenever the 'if' condition is false, set it to false.

Signed-off-by: shastry <malladi.sastry@intel.com>
---
 src/math/exp_fcn_hifi.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/math/exp_fcn_hifi.c b/src/math/exp_fcn_hifi.c
index 73331877e70f..72fee29de52f 100644
--- a/src/math/exp_fcn_hifi.c
+++ b/src/math/exp_fcn_hifi.c
@@ -85,7 +85,7 @@ static void mul_s64(ae_int64 in_0, ae_int64 in_1, ae_int64 *__restrict__ ptroutb
 		    ae_int64 *__restrict__ ptroutbitslo)
 {
 	ae_int64 producthihi, producthilo, productlolo;
-	ae_int64 producthi, productlo, product_hl_lh_h, product_hl_lh_l, carry;
+	ae_int64 producthi, product_hl_lh_h, product_hl_lh_l, carry;
 
 #if (SOFM_EXPONENTIAL_HIFI4 == 1 || SOFM_EXPONENTIAL_HIFI5 == 1)
 
@@ -126,6 +126,7 @@ static void mul_s64(ae_int64 in_0, ae_int64 in_1, ae_int64 *__restrict__ ptroutb
 	ae_int64 producthi_1c;
 	ae_int64 producthi_2c;
 	ae_int64 productlo_2c;
+	ae_int64 productlo;
 
 	ae_int64 s0 = AE_SRLI64(in_0, 63);
 	ae_int64 s1 = AE_SRLI64(in_1, 63);
@@ -190,7 +191,7 @@ static int64_t lomul_s64_sr_sat_near(int64_t a, int64_t b)
 	return AE_ADD64(temp, roundup);
 }
 
-static ae_int64 onebyfact_Q63[19] = {
+static const int64_t onebyfact_Q63[19] = {
 		4611686018427387904LL,
 		1537228672809129301LL,
 		384307168202282325LL,
@@ -235,7 +236,7 @@ int32_t sofm_exp_int32(int32_t x)
 	ae_int64 onebyfact;
 	ae_int64 temp;
 
-	ae_int64 *ponebyfact_Q63 = &onebyfact_Q63[0];
+	ae_int64 *ponebyfact_Q63 = (ae_int64 *)onebyfact_Q63;
 	ae_int64 ts = SOFM_EXP_TERMS_Q23P9;
 	ae_int64 mp = (x + SOFM_EXP_LSHIFT_BITS) >> 14; /* x in Q50.14 */;
 	xtbool flag;

From 9405cbb5b10c41cb415b91b0bd8b80893faf3d40 Mon Sep 17 00:00:00 2001
From: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Date: Fri, 5 Jan 2024 20:40:38 +0200
Subject: [PATCH 3/5] Math: Exp: Add functions sofm_exp_fixed and sofm_db2lin

This change allows the fast exponent library to replace
the decibels library for applications like DRC where exponent
function is used in hot code parts.

Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Signed-off-by: shastry <malladi.sastry@intel.com>
---
 src/include/sof/math/exp_fcn.h |  2 +
 src/math/exp_fcn.c             | 73 +++++++++++++++++++++++++-
 src/math/exp_fcn_hifi.c        | 96 ++++++++++++++++++++++++++++++++++
 3 files changed, 170 insertions(+), 1 deletion(-)

diff --git a/src/include/sof/math/exp_fcn.h b/src/include/sof/math/exp_fcn.h
index 50315b45a84a..425f400b23de 100644
--- a/src/include/sof/math/exp_fcn.h
+++ b/src/include/sof/math/exp_fcn.h
@@ -59,5 +59,7 @@
 #define SOFM_EXP_LSHIFT_BITS 0x2000
 
 int32_t sofm_exp_int32(int32_t x);
+int32_t sofm_exp_fixed(int32_t x);
+int32_t sofm_db2lin_fixed(int32_t db);
 
 #endif /* __SOFM_EXP_FCN_H__ */
diff --git a/src/math/exp_fcn.c b/src/math/exp_fcn.c
index ef218666cb90..2bace8a430cc 100644
--- a/src/math/exp_fcn.c
+++ b/src/math/exp_fcn.c
@@ -6,6 +6,7 @@
  *
  */
 
+#include <sof/audio/format.h>
 #include <sof/math/exp_fcn.h>
 #include <sof/math/numbers.h>
 #include <sof/common.h>
@@ -212,4 +213,74 @@ int32_t sofm_exp_int32(int32_t x)
 	}
 	return ts;
 }
-#endif
+
+/* Fixed point exponent function for approximate range -11.5 .. 7.6
+ * that corresponds to decibels range -100 .. +66 dB.
+ *
+ * The functions uses rule exp(x) = exp(x/2) * exp(x/2) to reduce
+ * the input argument for private small value exp() function that is
+ * accurate with input range -2.0 .. +2.0. The number of possible
+ * divisions by 2 is computed into variable n. The returned value is
+ * exp()^(2^n).
+ *
+ * Input  is Q5.27, -16.0 .. +16.0, but note the input range limitation
+ * Output is Q12.20, 0.0 .. +2048.0
+ */
+
+int32_t sofm_exp_fixed(int32_t x)
+{
+	int32_t xs;
+	int32_t y;
+	int32_t y0;
+	int i;
+	int n = 0;
+
+	if (x < SOFM_EXP_FIXED_INPUT_MIN)
+		return 0;
+
+	if (x > SOFM_EXP_FIXED_INPUT_MAX)
+		return INT32_MAX;
+
+	/* x is Q5.27 */
+	xs = x;
+	while (xs >= SOFM_EXP_TWO_Q27 || xs <= SOFM_EXP_MINUS_TWO_Q27) {
+		xs >>= 1;
+		n++;
+	}
+
+	/* sofm_exp_int32() input is Q4.28, while x1 is Q5.27
+	 * sofm_exp_int32() output is Q9.23, while y0 is Q12.20
+	 */
+	y0 = Q_SHIFT_RND(sofm_exp_int32(Q_SHIFT_LEFT(xs, 27, 28)), 23, 20);
+	y = SOFM_EXP_ONE_Q20;
+	for (i = 0; i < (1 << n); i++)
+		y = (int32_t)Q_MULTSR_32X32((int64_t)y, y0, 20, 20, 20);
+
+	return y;
+}
+
+#endif /* EXPONENTIAL_GENERIC */
+
+/* Decibels to linear conversion: The function uses exp() to calculate
+ * the linear value. The argument is multiplied by log(10)/20 to
+ * calculate equivalent of 10^(db/20).
+ *
+ * The error in conversion is less than 0.1 dB for -89..+66 dB range. Do not
+ * use the code for argument less than -100 dB. The code simply returns zero
+ * as linear value for such very small value.
+ *
+ * Input is Q8.24 (max 128.0)
+ * output is Q12.20 (max 2048.0)
+ */
+
+int32_t sofm_db2lin_fixed(int32_t db)
+{
+	int32_t arg;
+
+	if (db < SOFM_EXP_MINUS_100_Q24)
+		return 0;
+
+	/* Q8.24 x Q5.27, result needs to be Q5.27 */
+	arg = (int32_t)Q_MULTSR_32X32((int64_t)db, SOFM_EXP_LOG10_DIV20_Q27, 24, 27, 27);
+	return sofm_exp_fixed(arg);
+}
diff --git a/src/math/exp_fcn_hifi.c b/src/math/exp_fcn_hifi.c
index 72fee29de52f..f0a263b76873 100644
--- a/src/math/exp_fcn_hifi.c
+++ b/src/math/exp_fcn_hifi.c
@@ -26,6 +26,9 @@
 #include <xtensa/tie/xt_hifi3.h>
 #endif
 
+#include <xtensa/tie/xt_hifi2.h>
+#include <xtensa/tie/xt_FP.h>
+
 #define SOFM_CONVERG_ERROR 28823037624320LL /* error smaller than 1e-4,1/2 ^ -44.7122876209085 */
 
 /*
@@ -275,4 +278,97 @@ int32_t sofm_exp_int32(int32_t x)
 
 	return AE_MOVAD32_L(AE_MOVINT32X2_FROMINT64(ts));
 }
+
+/* Fractional multiplication with shift and round
+ * Note that the parameters px and py must be cast to (int64_t) if other type.
+ */
+static inline int exp_hifi_q_multsr_32x32(int a, int b, int c, int d, int e)
+{
+	ae_int64 res;
+	int xt_o;
+	int shift;
+
+	res = AE_MUL32_LL(a, b);
+	shift = XT_SUB(XT_ADD(c, d), XT_ADD(e, 1));
+	res = AE_SRAA64(res, shift);
+	res = AE_ADD64(res, 1);
+	res = AE_SRAI64(res, 1);
+	xt_o = AE_MOVINT32_FROMINT64(res);
+
+	return xt_o;
+}
+
+/* A macro for Q-shifts */
+static inline int exp_hifi_q_shift_rnd(int a, int b, int c)
+{
+	ae_int32 res;
+	int shift;
+
+	shift = XT_SUB(b, XT_ADD(c, 1));
+	res = AE_SRAA32(a, shift);
+	res = AE_ADD32(res, 1);
+	res = AE_SRAI32(res, 1);
+
+	return res;
+}
+
+/* Alternative version since compiler does not allow (x >> -1) */
+static inline int exp_hifi_q_shift_left(int a, int b, int c)
+{
+	ae_int32 xt_o;
+	int shift;
+
+	shift = XT_SUB(c, b);
+	xt_o = AE_SLAA32(a, shift);
+
+	return xt_o;
+}
+
+#define q_mult(a, b, qa, qb, qy) ((int32_t)exp_hifi_q_multsr_32x32((int64_t)(a), b, qa, qb, qy))
+/* Fixed point exponent function for approximate range -11.5 .. 7.6
+ * that corresponds to decibels range -100 .. +66 dB.
+ *
+ * The functions uses rule exp(x) = exp(x/2) * exp(x/2) to reduce
+ * the input argument for private small value exp() function that is
+ * accurate with input range -2.0 .. +2.0. The number of possible
+ * divisions by 2 is computed into variable n. The returned value is
+ * exp()^(2^n).
+ *
+ * Input  is Q5.27, -16.0 .. +16.0, but note the input range limitation
+ * Output is Q12.20, 0.0 .. +2048.0
+ */
+
+int32_t sofm_exp_fixed(int32_t x)
+{
+	int32_t xs;
+	int32_t y;
+	int32_t y0;
+	int i;
+	int n = 0;
+
+	if (x < SOFM_EXP_FIXED_INPUT_MIN)
+		return 0;
+
+	if (x > SOFM_EXP_FIXED_INPUT_MAX)
+		return INT32_MAX;
+
+	/* x is Q5.27 */
+	xs = x;
+	while (xs >= SOFM_EXP_TWO_Q27 || xs <= SOFM_EXP_MINUS_TWO_Q27) {
+		xs >>= 1;
+		n++;
+	}
+
+	/* sofm_exp_int32() input is Q4.28, while x1 is Q5.27
+	 * sofm_exp_int32() output is Q9.23, while y0 is Q12.20
+	 */
+	y0 = exp_hifi_q_shift_rnd(sofm_exp_int32(exp_hifi_q_shift_left(xs, 27, 28)),
+				  23, 20);
+	y = SOFM_EXP_ONE_Q20;
+	for (i = 0; i < (1 << n); i++)
+		y = (int32_t)exp_hifi_q_multsr_32x32((int64_t)y, y0, 20, 20, 20);
+
+	return y;
+}
+
 #endif

From 7a8430c6321fc4e340af55744b0071735ebb2d17 Mon Sep 17 00:00:00 2001
From: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Date: Fri, 5 Jan 2024 20:56:17 +0200
Subject: [PATCH 4/5] Zephyr: Patch Zephyr CMakeLists with exponential source
 files

In Zephyr CMakeLists, add exponential source files to facilitate
the compilation of math C and HiFi code.

Signed-off-by: shastry <malladi.sastry@intel.com>
---
 zephyr/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/zephyr/CMakeLists.txt b/zephyr/CMakeLists.txt
index 0959e7012140..67315950fb10 100644
--- a/zephyr/CMakeLists.txt
+++ b/zephyr/CMakeLists.txt
@@ -370,6 +370,8 @@ zephyr_library_sources(
 	${SOF_MATH_PATH}/decibels.c
 	${SOF_MATH_PATH}/numbers.c
 	${SOF_MATH_PATH}/trig.c
+	${SOF_MATH_PATH}/exp_fcn.c
+	${SOF_MATH_PATH}/exp_fcn_hifi.c
 
 	# SOF library - parts to transition to Zephyr over time
 	${SOF_LIB_PATH}/clk.c

From 0589da0992f11cabefdea07cf1446f565e9a8965 Mon Sep 17 00:00:00 2001
From: shastry <malladi.sastry@intel.com>
Date: Tue, 5 Dec 2023 08:24:06 +0530
Subject: [PATCH 5/5] Audio: DRC: Use fast exponent functions

The exp_fixed() function is replaced by fast sofm_exp_fixed()
and sofm_db2lin() functions. It saves 40 MCPS, from 123 to 83 MCPS
in a test run in TGL platform.

Signed-off-by: shastry <malladi.sastry@intel.com>
---
 src/audio/drc/Kconfig            |  2 +-
 src/audio/drc/drc_generic.c      | 14 ++++++++------
 src/audio/drc/drc_hifi3.c        | 11 ++++++-----
 src/audio/drc/drc_hifi4.c        | 11 ++++++-----
 src/audio/drc/drc_math_generic.c |  3 ++-
 5 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/src/audio/drc/Kconfig b/src/audio/drc/Kconfig
index c34da85e1bb5..985feede998a 100644
--- a/src/audio/drc/Kconfig
+++ b/src/audio/drc/Kconfig
@@ -4,7 +4,7 @@ config COMP_DRC
 	bool "Dynamic Range Compressor component"
 	select CORDIC_FIXED
 	select NUMBERS_NORM
-	select MATH_DECIBELS
+	select MATH_EXP
 	select COMP_BLOB
 	default n
 	help
diff --git a/src/audio/drc/drc_generic.c b/src/audio/drc/drc_generic.c
index 1ad0c50734a1..bc16fc670f99 100644
--- a/src/audio/drc/drc_generic.c
+++ b/src/audio/drc/drc_generic.c
@@ -6,7 +6,7 @@
 
 #include <sof/audio/component.h>
 #include <sof/audio/format.h>
-#include <sof/math/decibels.h>
+#include <sof/math/exp_fcn.h>
 #include <sof/math/numbers.h>
 #include <stdint.h>
 
@@ -36,7 +36,7 @@ static int32_t knee_curveK(const struct sof_drc_params *p, int32_t x)
 	 *	 beta = -expf(k * linear_threshold) / k
 	 *	 gamma = -k * x
 	 */
-	knee_exp_gamma = exp_fixed(Q_MULTSR_32X32((int64_t)x, -p->K, 31, 20, 27)); /* Q12.20 */
+	knee_exp_gamma = sofm_exp_fixed(Q_MULTSR_32X32((int64_t)x, -p->K, 31, 20, 27)); /* Q12.20 */
 	return p->knee_alpha + Q_MULTSR_32X32((int64_t)p->knee_beta, knee_exp_gamma, 24, 20, 24);
 }
 
@@ -66,8 +66,10 @@ static int32_t volume_gain(const struct sof_drc_params *p, int32_t x)
 		 * => y/x = ratio_base * x^(s - 1)
 		 * => y/x = ratio_base * e^(log(x) * (s - 1))
 		 */
-		exp_knee = exp_fixed(Q_MULTSR_32X32((int64_t)drc_log_fixed(Q_SHIFT_RND(x, 31, 26)),
-						    (p->slope - ONE_Q30), 26, 30, 27)); /* Q12.20 */
+		exp_knee = sofm_exp_fixed(Q_MULTSR_32X32((int64_t)
+						drc_log_fixed(Q_SHIFT_RND(x, 31, 26)),
+						(p->slope - ONE_Q30),
+						26, 30, 27)); /* Q12.20 */
 		y = Q_MULTSR_32X32((int64_t)p->ratio_base, exp_knee, 30, 20, 30);
 	}
 
@@ -149,7 +151,7 @@ void drc_update_detector_average(struct drc_state *state,
 						       p->sat_release_frames_inv_neg,
 						       21, 30, 24); /* Q8.24 */
 				sat_release_rate =
-					db2lin_fixed(db_per_frame) - ONE_Q20; /* Q12.20 */
+					sofm_db2lin_fixed(db_per_frame) - ONE_Q20; /* Q12.20 */
 				detector_average += Q_MULTSR_32X32((int64_t)gain_diff,
 								   sat_release_rate, 30, 20, 30);
 			}
@@ -226,7 +228,7 @@ void drc_update_envelope(struct drc_state *state, const struct sof_drc_params *p
 		/* db_per_frame = kSpacingDb / release_frames */
 		db_per_frame = drc_inv_fixed(release_frames, 12, 30); /* Q2.30 */
 		db_per_frame = Q_MULTSR_32X32((int64_t)db_per_frame, p->kSpacingDb, 30, 0, 24);
-		envelope_rate = db2lin_fixed(db_per_frame); /* Q12.20 */
+		envelope_rate = sofm_db2lin_fixed(db_per_frame); /* Q12.20 */
 	} else {
 		int32_t sat32;
 		/* Attack mode - compression_diff_db should be positive dB */
diff --git a/src/audio/drc/drc_hifi3.c b/src/audio/drc/drc_hifi3.c
index 5cb2920fc1d3..73f171fd7ece 100644
--- a/src/audio/drc/drc_hifi3.c
+++ b/src/audio/drc/drc_hifi3.c
@@ -6,7 +6,7 @@
 
 #include <sof/audio/component.h>
 #include <sof/audio/format.h>
-#include <sof/math/decibels.h>
+#include <sof/math/exp_fcn.h>
 #include <sof/math/numbers.h>
 #include <stdint.h>
 
@@ -42,7 +42,7 @@ static int32_t knee_curveK(const struct sof_drc_params *p, int32_t x)
 	 *	 gamma = -k * x
 	 */
 	gamma = drc_mult_lshift(x, -p->K, drc_get_lshift(31, 20, 27));
-	knee_exp_gamma = exp_fixed(gamma);
+	knee_exp_gamma = sofm_exp_fixed(gamma);
 	knee_curve_k = drc_mult_lshift(p->knee_beta, knee_exp_gamma, drc_get_lshift(24, 20, 24));
 	knee_curve_k = AE_ADD32(knee_curve_k, p->knee_alpha);
 	return knee_curve_k;
@@ -78,7 +78,7 @@ static int32_t volume_gain(const struct sof_drc_params *p, int32_t x)
 		tmp = AE_SRAI32R(x, 5); /* Q1.31 -> Q5.26 */
 		tmp = drc_log_fixed(tmp); /* Q6.26 */
 		tmp2 = AE_SUB32(p->slope, ONE_Q30); /* Q2.30 */
-		exp_knee = exp_fixed(drc_mult_lshift(tmp, tmp2, drc_get_lshift(26, 30, 27)));
+		exp_knee = sofm_exp_fixed(drc_mult_lshift(tmp, tmp2, drc_get_lshift(26, 30, 27)));
 		y = drc_mult_lshift(p->ratio_base, exp_knee, drc_get_lshift(30, 20, 30));
 	}
 
@@ -159,7 +159,8 @@ void drc_update_detector_average(struct drc_state *state,
 				db_per_frame = drc_mult_lshift(drc_lin2db_fixed(gain),
 							       p->sat_release_frames_inv_neg,
 							       drc_get_lshift(21, 30, 24));
-				sat_release_rate = AE_SUB32(db2lin_fixed(db_per_frame), ONE_Q20);
+				sat_release_rate = AE_SUB32(sofm_db2lin_fixed(db_per_frame),
+							    ONE_Q20);
 				tmp = drc_mult_lshift(gain_diff, sat_release_rate,
 						      drc_get_lshift(30, 20, 30));
 			}
@@ -254,7 +255,7 @@ void drc_update_envelope(struct drc_state *state, const struct sof_drc_params *p
 		tmp = p->kSpacingDb << 16; /* Q16.16 */
 		lshift = drc_get_lshift(30, 16, 24);
 		db_per_frame = drc_mult_lshift(db_per_frame, tmp, lshift); /* Q8.24 */
-		envelope_rate = db2lin_fixed(db_per_frame); /* Q12.20 */
+		envelope_rate = sofm_db2lin_fixed(db_per_frame); /* Q12.20 */
 	} else {
 		/* Attack mode - compression_diff_db should be positive dB */
 
diff --git a/src/audio/drc/drc_hifi4.c b/src/audio/drc/drc_hifi4.c
index b0c34ea5e225..b414831a30a6 100644
--- a/src/audio/drc/drc_hifi4.c
+++ b/src/audio/drc/drc_hifi4.c
@@ -6,7 +6,7 @@
 
 #include <sof/audio/component.h>
 #include <sof/audio/format.h>
-#include <sof/math/decibels.h>
+#include <sof/math/exp_fcn.h>
 #include <sof/math/numbers.h>
 #include <stdint.h>
 
@@ -65,7 +65,7 @@ static int32_t knee_curveK(const struct sof_drc_params *p, int32_t x)
 	 *	 gamma = -k * x
 	 */
 	gamma = drc_mult_lshift(x, -p->K, LSHIFT_QX31_QY20_QZ27);
-	knee_exp_gamma = exp_fixed(gamma);
+	knee_exp_gamma = sofm_exp_fixed(gamma);
 	knee_curve_k = drc_mult_lshift(p->knee_beta, knee_exp_gamma, LSHIFT_QX24_QY20_QZ24);
 	knee_curve_k = AE_ADD32(knee_curve_k, p->knee_alpha);
 	return knee_curve_k;
@@ -101,7 +101,7 @@ static int32_t volume_gain(const struct sof_drc_params *p, int32_t x)
 		tmp = AE_SRAI32R(x, 5); /* Q1.31 -> Q5.26 */
 		tmp = drc_log_fixed(tmp); /* Q6.26 */
 		tmp2 = AE_SUB32(p->slope, ONE_Q30); /* Q2.30 */
-		exp_knee = exp_fixed(drc_mult_lshift(tmp, tmp2, LSHIFT_QX26_QY30_QZ27));
+		exp_knee = sofm_exp_fixed(drc_mult_lshift(tmp, tmp2, LSHIFT_QX26_QY30_QZ27));
 		y = drc_mult_lshift(p->ratio_base, exp_knee, LSHIFT_QX30_QY20_QZ30);
 	}
 
@@ -185,7 +185,8 @@ void drc_update_detector_average(struct drc_state *state,
 				db_per_frame = drc_mult_lshift(drc_lin2db_fixed(gain),
 							       p->sat_release_frames_inv_neg,
 							       LSHIFT_QX21_QY30_QZ24);
-				sat_release_rate = AE_SUB32(db2lin_fixed(db_per_frame), ONE_Q20);
+				sat_release_rate = AE_SUB32(sofm_db2lin_fixed(db_per_frame),
+							    ONE_Q20);
 				tmp = drc_mult_lshift(gain_diff, sat_release_rate,
 						      LSHIFT_QX30_QY20_QZ30);
 			}
@@ -278,7 +279,7 @@ void drc_update_envelope(struct drc_state *state, const struct sof_drc_params *p
 		tmp = p->kSpacingDb << 16; /* Q16.16 */
 		/* Q8.24 */
 		db_per_frame = drc_mult_lshift(db_per_frame, tmp, LSHIFT_QX30_QY16_QZ24);
-		envelope_rate = db2lin_fixed(db_per_frame); /* Q12.20 */
+		envelope_rate = sofm_db2lin_fixed(db_per_frame); /* Q12.20 */
 	} else {
 		/* Attack mode - compression_diff_db should be positive dB */
 
diff --git a/src/audio/drc/drc_math_generic.c b/src/audio/drc/drc_math_generic.c
index 7ebcae51ab4b..436ee444c7c4 100644
--- a/src/audio/drc/drc_math_generic.c
+++ b/src/audio/drc/drc_math_generic.c
@@ -6,6 +6,7 @@
 
 #include <sof/audio/format.h>
 #include <sof/math/decibels.h>
+#include <sof/math/exp_fcn.h>
 #include <sof/math/numbers.h>
 #include <sof/math/trig.h>
 
@@ -234,7 +235,7 @@ inline int32_t drc_pow_fixed(int32_t x, int32_t y)
 		return 0;
 
 	/* x^y = expf(y * log(x)) */
-	return exp_fixed(q_mult(y, drc_log_fixed(x), 30, 26, 27));
+	return sofm_exp_fixed(q_mult(y, drc_log_fixed(x), 30, 26, 27));
 }
 
 #undef q_multq