diff --git a/src/audio/tdfb/tdfb_hifi3.c b/src/audio/tdfb/tdfb_hifi3.c index 97b5229d02dd..2109431d7993 100644 --- a/src/audio/tdfb/tdfb_hifi3.c +++ b/src/audio/tdfb/tdfb_hifi3.c @@ -38,68 +38,54 @@ void tdfb_fir_s16(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource int in_nch = audio_stream_get_channels(source); int out_nch = audio_stream_get_channels(sink); int emp_ch = 0; - int n, nmax; - int remaining_frames = frames; - const int inc = sizeof(ae_int16); - while (remaining_frames) { - nmax = audio_stream_frames_without_wrap(source, x); - n = MIN(remaining_frames, nmax); - nmax = audio_stream_frames_without_wrap(sink, y); - n = MIN(n, nmax); + for (j = 0; j < (frames >> 1); j++) { + /* Clear output mix*/ + memset(cd->out, 0, 2 * out_nch * sizeof(int32_t)); - for (j = 0; j < n; j += 2) { - /* Clear output mix*/ - memset(cd->out, 0, 2 * out_nch * sizeof(int32_t)); - - /* Read two frames from all input channels - * there won't be buffer overflow since we - * set 2 frames align in tdfb_prepare function. - */ - for (i = 0; i < 2 * in_nch; i++) { - AE_L16_XP(d, x, inc); - cd->in[i] = (ae_int32)AE_CVT32X2F16_32(d); - tdfb_direction_copy_emphasis(cd, in_nch, &emp_ch, cd->in[i]); - } - - /* Run and mix all filters to their output channel */ - for (i = 0; i < cfg->num_filters; i++) { - is = cd->input_channel_select[i]; - is2 = is + in_nch; - om = cd->output_channel_mix[i]; - - /* Get filter instance */ - f = &cd->fir[i]; - shift = -f->out_shift; + /* Read two frames from all input channels */ + fir_comp_setup_circular(source); + for (i = 0; i < 2 * in_nch; i++) { + AE_L16_XC(d, x, sizeof(int16_t)); + cd->in[i] = (ae_int32)AE_CVT32X2F16_32(d); + tdfb_direction_copy_emphasis(cd, in_nch, &emp_ch, cd->in[i]); + } - /* Compute FIR and mix as Q5.27*/ - fir_core_setup_circular(f); - fir_32x16_2x_hifi3(f, cd->in[is], cd->in[is2], &y0, &y1, - shift); - for (k = 0; k < out_nch; k++) { - if (om & 1) { - cd->out[k] += (int32_t)y0 >> 4; - cd->out[k + out_nch] += - (int32_t)y1 >> 4; - } - om = om >> 1; + /* Run and mix all filters to their output channel */ + for (i = 0; i < cfg->num_filters; i++) { + is = cd->input_channel_select[i]; + is2 = is + in_nch; + om = cd->output_channel_mix[i]; + + /* Get filter instance */ + f = &cd->fir[i]; + shift = -f->out_shift; + + /* Compute FIR and mix as Q5.27*/ + fir_core_setup_circular(f); + fir_32x16_2x_hifi3(f, cd->in[is], cd->in[is2], &y0, &y1, + shift); + for (k = 0; k < out_nch; k++) { + if (om & 1) { + cd->out[k] += (int32_t)y0 >> 4; + cd->out[k + out_nch] += + (int32_t)y1 >> 4; } + om = om >> 1; } + } - /* Write two frames of output. The values in out[] are shifted - * left and saturated to convert to Q1.27. The values - * are then rounded to 16 bit and converted to Q1.15 for - * sink buffer. TODO: Could saturate four samples with - * one AE_ROUND16X4F32SSYM() instruction. - */ - for (i = 0; i < 2 * out_nch; i++) { - d = AE_ROUND16X4F32SSYM(0, AE_SLAI32S(cd->out[i], 4)); - AE_S16_0_XP(d, y, inc); - } + /* Write two frames of output. The values in out[] are shifted + * left and saturated to convert to Q1.27. The the values + * are then rounded to 16 bit and converted to Q1.15 for + * sink buffer. TODO: Could saturate four samples with + * one AE_ROUND16X4F32SSYM() instruction. + */ + fir_comp_setup_circular(sink); + for (i = 0; i < 2 * out_nch; i++) { + d = AE_ROUND16X4F32SSYM(0, AE_SLAI32S(cd->out[i], 4)); + AE_S16_0_XC(d, y, sizeof(int16_t)); } - remaining_frames -= n; - x = audio_stream_wrap(source, x); - y = audio_stream_wrap(sink, y); } } #endif @@ -127,68 +113,53 @@ void tdfb_fir_s24(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource int in_nch = audio_stream_get_channels(source); int out_nch = audio_stream_get_channels(sink); int emp_ch = 0; - int n, nmax; - int remaining_frames = frames; - const int inc = sizeof(ae_int32); - - while (remaining_frames) { - nmax = audio_stream_frames_without_wrap(source, x); - n = MIN(remaining_frames, nmax); - nmax = audio_stream_frames_without_wrap(sink, y); - n = MIN(n, nmax); - - for (j = 0; j < n; j += 2) { - /* Clear output mix*/ - memset(cd->out, 0, 2 * out_nch * sizeof(int32_t)); - - /* Read two frames from all input channels - * there won't be buffer overflow since we - * set 2 frames align in tdfb_prepare function. - */ - for (i = 0; i < 2 * in_nch; i++) { - AE_L32_XP(d, x, inc); - cd->in[i] = AE_SLAI32(d, 8); - tdfb_direction_copy_emphasis(cd, in_nch, &emp_ch, cd->in[i]); - } - /* Run and mix all filters to their output channel */ - for (i = 0; i < cfg->num_filters; i++) { - is = cd->input_channel_select[i]; - is2 = is + in_nch; - om = cd->output_channel_mix[i]; + for (j = 0; j < (frames >> 1); j++) { + /* Clear output mix*/ + memset(cd->out, 0, 2 * out_nch * sizeof(int32_t)); - /* Get filter instance */ - f = &cd->fir[i]; - shift = -f->out_shift; + /* Read two frames from all input channels */ + fir_comp_setup_circular(source); + for (i = 0; i < 2 * in_nch; i++) { + AE_L32_XC(d, x, sizeof(int32_t)); + cd->in[i] = AE_SLAI32(d, 8); + tdfb_direction_copy_emphasis(cd, in_nch, &emp_ch, cd->in[i]); + } - /* Compute FIR and mix as Q5.27*/ - fir_core_setup_circular(f); - fir_32x16_2x_hifi3(f, cd->in[is], cd->in[is2], &y0, &y1, - shift); - for (k = 0; k < out_nch; k++) { - if (om & 1) { - cd->out[k] += (int32_t)y0 >> 4; - cd->out[k + out_nch] += - (int32_t)y1 >> 4; - } - om = om >> 1; + for (i = 0; i < cfg->num_filters; i++) { + is = cd->input_channel_select[i]; + is2 = is + in_nch; + om = cd->output_channel_mix[i]; + + /* Get filter instance */ + f = &cd->fir[i]; + shift = -f->out_shift; + + /* Compute FIR and mix as Q5.27*/ + fir_core_setup_circular(f); + fir_32x16_2x_hifi3(f, cd->in[is], cd->in[is2], &y0, &y1, + shift); + for (k = 0; k < out_nch; k++) { + if (om & 1) { + cd->out[k] += (int32_t)y0 >> 4; + cd->out[k + out_nch] += + (int32_t)y1 >> 4; } + om = om >> 1; } + } - /* Write two frames of output. The values in out[] are shifted - * left and saturated to convert to Q1.27. The values - * are then rounded to 16 bit and converted to Q1.15 for - * sink buffer. TODO: Could saturate four samples with - * one AE_ROUND16X4F32SSYM() instruction. - */ - for (i = 0; i < 2 * out_nch; i++) { - d = AE_SRAI32(AE_SLAI32S(AE_SRAI32R(cd->out[i], 4), 8), 8); - AE_S32_L_XP(d, y, inc); - } + /* Write two frames of output. The values from out[] are first + * rounded to Q5.23 format, then saturated to Q1.23, and + * shifted by 8 to LSB side of the word before storing to sink. + * TODO: Could shift etc. two samples simultaneously. + */ + fir_comp_setup_circular(sink); + for (i = 0; i < 2 * out_nch; i++) { + d = AE_SRAI32(AE_SLAI32S(AE_SRAI32R(cd->out[i], 4), 8), + 8); + AE_S32_L_XC(d, y, sizeof(int32_t)); } - remaining_frames -= n; - x = audio_stream_wrap(source, x); - y = audio_stream_wrap(sink, y); } } #endif @@ -216,66 +187,52 @@ void tdfb_fir_s32(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource int in_nch = audio_stream_get_channels(source); int out_nch = audio_stream_get_channels(sink); int emp_ch = 0; - int n, nmax; - int remaining_frames = frames; - const int inc = sizeof(ae_int32); - - while (remaining_frames) { - nmax = audio_stream_frames_without_wrap(source, x); - n = MIN(remaining_frames, nmax); - nmax = audio_stream_frames_without_wrap(sink, y); - n = MIN(n, nmax); - - for (j = 0; j < n; j += 2) { - /* Clear output mix*/ - memset(cd->out, 0, 2 * out_nch * sizeof(int32_t)); - /* Read two frames from all input channels - * there won't be buffer overflow since we - * set 2 frames align in tdfb_prepare function. - */ - for (i = 0; i < 2 * in_nch; i++) { - AE_L32_XC(d, x, inc); - cd->in[i] = d; - tdfb_direction_copy_emphasis(cd, in_nch, &emp_ch, cd->in[i]); - } - - for (i = 0; i < cfg->num_filters; i++) { - is = cd->input_channel_select[i]; - is2 = is + in_nch; - om = cd->output_channel_mix[i]; + for (j = 0; j < (frames >> 1); j++) { + /* Clear output mix*/ + memset(cd->out, 0, 2 * out_nch * sizeof(int32_t)); - /* Get filter instance */ - f = &cd->fir[i]; - shift = -f->out_shift; + /* Read two frames from all input channels */ + fir_comp_setup_circular(source); + for (i = 0; i < 2 * in_nch; i++) { + AE_L32_XC(d, x, sizeof(int32_t)); + cd->in[i] = d; + tdfb_direction_copy_emphasis(cd, in_nch, &emp_ch, cd->in[i]); + } - /* Compute FIR and mix as Q5.27*/ - fir_core_setup_circular(f); - fir_32x16_2x_hifi3(f, cd->in[is], cd->in[is2], &y0, &y1, - shift); - for (k = 0; k < out_nch; k++) { - if (om & 1) { - cd->out[k] += (int32_t)y0 >> 4; - cd->out[k + out_nch] += - (int32_t)y1 >> 4; - } - om = om >> 1; + for (i = 0; i < cfg->num_filters; i++) { + is = cd->input_channel_select[i]; + is2 = is + in_nch; + om = cd->output_channel_mix[i]; + + /* Get filter instance */ + f = &cd->fir[i]; + shift = -f->out_shift; + + /* Compute FIR and mix as Q5.27*/ + fir_core_setup_circular(f); + fir_32x16_2x_hifi3(f, cd->in[is], cd->in[is2], &y0, &y1, + shift); + for (k = 0; k < out_nch; k++) { + if (om & 1) { + cd->out[k] += (int32_t)y0 >> 4; + cd->out[k + out_nch] += + (int32_t)y1 >> 4; } + om = om >> 1; } + } - /* Write two frames of output. In Q5.27 to Q1.31 conversion - * rounding is not applicable so just shift left by 4 and - * saturate. TODO: Could shift two samples with one - * instruction. - */ - for (i = 0; i < 2 * out_nch; i++) { - d = AE_SLAI32S(cd->out[i], 4); - AE_S32_L_XP(d, y, inc); - } + /* Write two frames of output. In Q5.27 to Q1.31 conversion + * rounding is not applicable so just shift left by 4 and + * saturate. TODO: Could shift two samples with one + * instruction. + */ + fir_comp_setup_circular(sink); + for (i = 0; i < 2 * out_nch; i++) { + d = AE_SLAI32S(cd->out[i], 4); + AE_S32_L_XC(d, y, sizeof(int32_t)); } - remaining_frames -= n; - x = audio_stream_wrap(source, x); - y = audio_stream_wrap(sink, y); } } #endif diff --git a/src/include/sof/audio/tdfb/tdfb_comp.h b/src/include/sof/audio/tdfb/tdfb_comp.h index ce48760e22a6..d26403b5c015 100644 --- a/src/include/sof/audio/tdfb/tdfb_comp.h +++ b/src/include/sof/audio/tdfb/tdfb_comp.h @@ -24,7 +24,7 @@ #define TDFB_GENERIC 0 #define TDFB_HIFIEP 1 #define TDFB_HIFI3 0 -#elif XCHAL_HAVE_HIFI3 == 1 || XCHAL_HAVE_HIFI4 == 1 +#elif XCHAL_HAVE_HIFI3 == 1 #define TDFB_HIFI3 1 #define TDFB_HIFIEP 0 #define TDFB_GENERIC 0