From 165aae6e362212f9476863f78f3a70a0ca23c076 Mon Sep 17 00:00:00 2001 From: Andrula Song Date: Mon, 24 Apr 2023 11:16:10 +0800 Subject: [PATCH] Audio: EQ-FIR: Optimize the eq_fir_2x_sx functions Optimize the eq_fir_2x_sx functions by remove the duplicated usage of circular buffer. Signed-off-by: Andrula Song --- src/audio/eq_fir/eq_fir_hifi3.c | 236 ++++++++++++++---------------- src/include/sof/math/fir_config.h | 2 +- 2 files changed, 113 insertions(+), 125 deletions(-) diff --git a/src/audio/eq_fir/eq_fir_hifi3.c b/src/audio/eq_fir/eq_fir_hifi3.c index 317732c07b0a..b9b5f3b1ec63 100644 --- a/src/audio/eq_fir/eq_fir_hifi3.c +++ b/src/audio/eq_fir/eq_fir_hifi3.c @@ -33,55 +33,49 @@ void eq_fir_2x_s32(struct fir_state_32x16 fir[], struct input_stream_buffer *bso ae_int32x2 d0 = 0; ae_int32x2 d1 = 0; ae_int32 *src = audio_stream_get_rptr(source); - ae_int32 *snk = audio_stream_get_wptr(sink); + ae_int32 *dst = audio_stream_get_wptr(sink); ae_int32 *x; ae_int32 *y0; ae_int32 *y1; int ch; - int i; + int i, n, nmax; int rshift; int lshift; int shift; int nch = audio_stream_get_channels(source); int inc_nch_s = nch * sizeof(int32_t); int inc_2nch_s = 2 * inc_nch_s; - - for (ch = 0; ch < nch; ch++) { - /* Get FIR instance and get shifts. - */ - f = &fir[ch]; - fir_get_lrshifts(f, &lshift, &rshift); - shift = lshift - rshift; - - /* Copy src to x and advance src with dummy load */ - fir_comp_setup_circular(source); - x = src; - AE_L32_XC(d0, src, sizeof(int32_t)); - - /* Copy snk to y0 and advance snk with dummy load. Pointer - * y1 is set to be ahead of y0 with one frame. - */ - fir_comp_setup_circular(sink); - y0 = snk; - y1 = snk; - AE_L32_XC(d0, snk, sizeof(int32_t)); - AE_L32_XC(d1, y1, inc_nch_s); - - for (i = 0; i < (frames >> 1); i++) { - /* Load two input samples via input pointer x */ - fir_comp_setup_circular(source); - AE_L32_XC(d0, x, inc_nch_s); - AE_L32_XC(d1, x, inc_nch_s); - - /* Compute FIR */ + int samples = nch * frames; + + while (samples) { + nmax = audio_stream_samples_without_wrap_s32(sink, dst); + n = MIN(nmax, samples); + nmax = audio_stream_samples_without_wrap_s32(source, src); + n = MIN(n, nmax); + for (ch = 0; ch < nch; ch++) { + /* Get FIR instance and get shifts.*/ + f = &fir[ch]; + fir_get_lrshifts(f, &lshift, &rshift); + shift = lshift - rshift; + /* set f->delay as circular buffer */ fir_core_setup_circular(f); - fir_32x16_2x_hifi3(f, d0, d1, y0, y1, shift); - /* Update output pointers y0 and y1 with dummy loads */ - fir_comp_setup_circular(sink); - AE_L32_XC(d0, y0, inc_2nch_s); - AE_L32_XC(d1, y1, inc_2nch_s); + x = src + ch; + y0 = dst + ch; + y1 = y0 + nch; + + for (i = 0; i < (n >> 1); i += nch) { + /* Load two input samples via input pointer x */ + AE_L32_XP(d0, x, inc_nch_s); + AE_L32_XP(d1, x, inc_nch_s); + fir_32x16_2x_hifi3(f, d0, d1, y0, y1, shift); + AE_L32_XC(d0, y0, inc_2nch_s); + AE_L32_XC(d1, y1, inc_2nch_s); + } } + samples -= n; + dst = audio_stream_wrap(sink, dst + n); + src = audio_stream_wrap(source, src + n); } } #endif /* CONFIG_FORMAT_S32LE */ @@ -98,64 +92,62 @@ void eq_fir_2x_s24(struct fir_state_32x16 fir[], struct input_stream_buffer *bso ae_int32 z0; ae_int32 z1; ae_int32 *src = audio_stream_get_rptr(source); - ae_int32 *snk = audio_stream_get_wptr(sink); + ae_int32 *dst = audio_stream_get_wptr(sink); ae_int32 *x; ae_int32 *y; int ch; - int i; + int i, n, nmax; int rshift; int lshift; int shift; int nch = audio_stream_get_channels(source); int inc_nch_s = nch * sizeof(int32_t); - - for (ch = 0; ch < nch; ch++) { - /* Get FIR instance and get shifts. - */ - f = &fir[ch]; - fir_get_lrshifts(f, &lshift, &rshift); - shift = lshift - rshift; - - /* Copy src to x and advance src with dummy load */ - fir_comp_setup_circular(source); - x = src; - AE_L32_XC(d0, src, sizeof(int32_t)); - - /* Copy snk to y0 and advance snk with dummy load. Pointer - * y1 is set to be ahead of y0 with one frame. - */ - fir_comp_setup_circular(sink); - y = snk; - AE_L32_XC(d0, snk, sizeof(int32_t)); - - for (i = 0; i < (frames >> 1); i++) { - /* Load two input samples via input pointer x */ - fir_comp_setup_circular(source); - AE_L32_XC(d0, x, inc_nch_s); - AE_L32_XC(d1, x, inc_nch_s); - - /* Convert Q1.23 to Q1.31 compatible format */ - d0 = AE_SLAA32(d0, 8); - d1 = AE_SLAA32(d1, 8); - - /* Compute FIR */ + int samples = nch * frames; + + while (samples) { + nmax = audio_stream_samples_without_wrap_s24(sink, dst); + n = MIN(nmax, samples); + nmax = audio_stream_samples_without_wrap_s24(source, src); + n = MIN(n, nmax); + for (ch = 0; ch < nch; ch++) { + /* Get FIR instance and get shifts.*/ + f = &fir[ch]; + fir_get_lrshifts(f, &lshift, &rshift); + shift = lshift - rshift; + /* set f->delay as circular buffer */ fir_core_setup_circular(f); - fir_32x16_2x_hifi3(f, d0, d1, &z0, &z1, shift); - /* Shift and round to Q1.23 format */ - d0 = AE_SRAI32R(z0, 8); - d0 = AE_SLAI32S(d0, 8); - d0 = AE_SRAI32(d0, 8); + x = src + ch; + y = dst + ch; + + for (i = 0; i < (n >> 1); i += nch) { + /* Load two input samples via input pointer x */ + AE_L32_XP(d0, x, inc_nch_s); + AE_L32_XP(d1, x, inc_nch_s); + + /* Convert Q1.23 to Q1.31 compatible format */ + d0 = AE_SLAA32(d0, 8); + d1 = AE_SLAA32(d1, 8); + + fir_32x16_2x_hifi3(f, d0, d1, &z0, &z1, shift); + + /* Shift and round to Q1.23 format */ + d0 = AE_SRAI32R(z0, 8); + d0 = AE_SLAI32S(d0, 8); + d0 = AE_SRAI32(d0, 8); - d1 = AE_SRAI32R(z1, 8); - d1 = AE_SLAI32S(d1, 8); - d1 = AE_SRAI32(d1, 8); + d1 = AE_SRAI32R(z1, 8); + d1 = AE_SLAI32S(d1, 8); + d1 = AE_SRAI32(d1, 8); - /* Store output and update output pointers */ - fir_comp_setup_circular(sink); - AE_S32_L_XC(d0, y, inc_nch_s); - AE_S32_L_XC(d1, y, inc_nch_s); + /* Store output and update output pointers */ + AE_S32_L_XC(d0, y, inc_nch_s); + AE_S32_L_XC(d1, y, inc_nch_s); + } } + samples -= n; + dst = audio_stream_wrap(sink, dst + n); + src = audio_stream_wrap(source, src + n); } } #endif /* CONFIG_FORMAT_S24LE */ @@ -174,61 +166,57 @@ void eq_fir_2x_s16(struct fir_state_32x16 fir[], struct input_stream_buffer *bso ae_int32 x0; ae_int32 x1; ae_int16 *src = audio_stream_get_rptr(source); - ae_int16 *snk = audio_stream_get_wptr(sink); + ae_int16 *dst = audio_stream_get_wptr(sink); ae_int16 *x; ae_int16 *y; int ch; - int i; + int i, n, nmax; int rshift; int lshift; int shift; int nch = audio_stream_get_channels(source); int inc_nch_s = nch * sizeof(int16_t); - - for (ch = 0; ch < nch; ch++) { - /* Get FIR instance and get shifts. - */ - f = &fir[ch]; - fir_get_lrshifts(f, &lshift, &rshift); - shift = lshift - rshift; - - /* Copy src to x and advance src to next channel with - * dummy load. - */ - fir_comp_setup_circular(source); - x = src; - AE_L16_XC(d0, src, sizeof(int16_t)); - - /* Copy pointer snk to y0 and advance snk with dummy load. - * Pointer y1 is set to be ahead of y0 with one frame. - */ - fir_comp_setup_circular(sink); - y = snk; - AE_L16_XC(d0, snk, sizeof(int16_t)); - - for (i = 0; i < (frames >> 1); i++) { - /* Load two input samples via input pointer x */ - fir_comp_setup_circular(source); - AE_L16_XC(d0, x, inc_nch_s); - AE_L16_XC(d1, x, inc_nch_s); - - /* Convert Q1.15 to Q1.31 compatible format */ - x0 = AE_CVT32X2F16_32(d0); - x1 = AE_CVT32X2F16_32(d1); - - /* Compute FIR */ + int samples = nch * frames; + + while (samples) { + nmax = audio_stream_samples_without_wrap_s16(sink, dst); + n = MIN(nmax, samples); + nmax = audio_stream_samples_without_wrap_s16(source, src); + n = MIN(n, nmax); + for (ch = 0; ch < nch; ch++) { + /* Get FIR instance and get shifts.*/ + f = &fir[ch]; + fir_get_lrshifts(f, &lshift, &rshift); + shift = lshift - rshift; + /* set f->delay as circular buffer */ fir_core_setup_circular(f); - fir_32x16_2x_hifi3(f, x0, x1, &z0, &z1, shift); - /* Round to Q1.15 format */ - d0 = AE_ROUND16X4F32SSYM(z0, z0); - d1 = AE_ROUND16X4F32SSYM(z1, z1); + x = src + ch; + y = dst + ch; + + for (i = 0; i < (n >> 1); i += nch) { + /* Load two input samples via input pointer x */ + AE_L16_XP(d0, x, inc_nch_s); + AE_L16_XP(d1, x, inc_nch_s); + + /* Convert Q1.15 to Q1.31 compatible format */ + x0 = AE_CVT32X2F16_32(d0); + x1 = AE_CVT32X2F16_32(d1); + + fir_32x16_2x_hifi3(f, x0, x1, &z0, &z1, shift); + + /* Round to Q1.15 format */ + d0 = AE_ROUND16X4F32SSYM(z0, z0); + d1 = AE_ROUND16X4F32SSYM(z1, z1); - /* Store output and update output pointers */ - fir_comp_setup_circular(sink); - AE_S16_0_XC(d0, y, inc_nch_s); - AE_S16_0_XC(d1, y, inc_nch_s); + /* Store output and update output pointers */ + AE_S16_0_XC(d0, y, inc_nch_s); + AE_S16_0_XC(d1, y, inc_nch_s); + } } + samples -= n; + dst = audio_stream_wrap(sink, dst + n); + src = audio_stream_wrap(source, src + n); } } #endif /* CONFIG_FORMAT_S16LE */ diff --git a/src/include/sof/math/fir_config.h b/src/include/sof/math/fir_config.h index c40f487c6085..e181c602e00f 100644 --- a/src/include/sof/math/fir_config.h +++ b/src/include/sof/math/fir_config.h @@ -44,7 +44,7 @@ #if XCHAL_HAVE_HIFI2EP == 1 #define FIR_HIFIEP 1 #define FIR_HIFI3 0 -#elif XCHAL_HAVE_HIFI3 == 1 +#elif XCHAL_HAVE_HIFI3 == 1 || XCHAL_HAVE_HIFI4 == 1 #define FIR_HIFI3 1 #define FIR_HIFIEP 0 #else