Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
236 changes: 112 additions & 124 deletions src/audio/eq_fir/eq_fir_hifi3.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,55 +33,49 @@ void eq_fir_2x_s32(struct fir_state_32x16 fir[], struct input_stream_buffer *bso
ae_int32x2 d0 = 0;
ae_int32x2 d1 = 0;
ae_int32 *src = audio_stream_get_rptr(source);
ae_int32 *snk = audio_stream_get_wptr(sink);
ae_int32 *dst = audio_stream_get_wptr(sink);
ae_int32 *x;
ae_int32 *y0;
ae_int32 *y1;
int ch;
int i;
int i, n, nmax;
int rshift;
int lshift;
int shift;
int nch = audio_stream_get_channels(source);
int inc_nch_s = nch * sizeof(int32_t);
int inc_2nch_s = 2 * inc_nch_s;

for (ch = 0; ch < nch; ch++) {
/* Get FIR instance and get shifts.
*/
f = &fir[ch];
fir_get_lrshifts(f, &lshift, &rshift);
shift = lshift - rshift;

/* Copy src to x and advance src with dummy load */
fir_comp_setup_circular(source);
x = src;
AE_L32_XC(d0, src, sizeof(int32_t));

/* Copy snk to y0 and advance snk with dummy load. Pointer
* y1 is set to be ahead of y0 with one frame.
*/
fir_comp_setup_circular(sink);
y0 = snk;
y1 = snk;
AE_L32_XC(d0, snk, sizeof(int32_t));
AE_L32_XC(d1, y1, inc_nch_s);

for (i = 0; i < (frames >> 1); i++) {
/* Load two input samples via input pointer x */
fir_comp_setup_circular(source);
AE_L32_XC(d0, x, inc_nch_s);
AE_L32_XC(d1, x, inc_nch_s);

/* Compute FIR */
int samples = nch * frames;

while (samples) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm worrying about change of processing order. A run in real device with long FIR would give more confidence that this is better. Maybe also xt testbench could be enough because it has pipelines and scheduler framework from SOF. You could add a test response where left and right channel have different coefficients and make them long. See the FIR EQ examples generator how it's done.

nmax = audio_stream_samples_without_wrap_s32(sink, dst);
n = MIN(nmax, samples);
nmax = audio_stream_samples_without_wrap_s32(source, src);
n = MIN(n, nmax);
for (ch = 0; ch < nch; ch++) {
/* Get FIR instance and get shifts.*/
f = &fir[ch];
fir_get_lrshifts(f, &lshift, &rshift);
shift = lshift - rshift;
/* set f->delay as circular buffer */
fir_core_setup_circular(f);
fir_32x16_2x_hifi3(f, d0, d1, y0, y1, shift);

/* Update output pointers y0 and y1 with dummy loads */
fir_comp_setup_circular(sink);
AE_L32_XC(d0, y0, inc_2nch_s);
AE_L32_XC(d1, y1, inc_2nch_s);
x = src + ch;
y0 = dst + ch;
y1 = y0 + nch;

for (i = 0; i < (n >> 1); i += nch) {
/* Load two input samples via input pointer x */
AE_L32_XP(d0, x, inc_nch_s);
AE_L32_XP(d1, x, inc_nch_s);
fir_32x16_2x_hifi3(f, d0, d1, y0, y1, shift);
AE_L32_XC(d0, y0, inc_2nch_s);
AE_L32_XC(d1, y1, inc_2nch_s);
}
}
samples -= n;
dst = audio_stream_wrap(sink, dst + n);
src = audio_stream_wrap(source, src + n);
}
}
#endif /* CONFIG_FORMAT_S32LE */
Expand All @@ -98,64 +92,62 @@ void eq_fir_2x_s24(struct fir_state_32x16 fir[], struct input_stream_buffer *bso
ae_int32 z0;
ae_int32 z1;
ae_int32 *src = audio_stream_get_rptr(source);
ae_int32 *snk = audio_stream_get_wptr(sink);
ae_int32 *dst = audio_stream_get_wptr(sink);
ae_int32 *x;
ae_int32 *y;
int ch;
int i;
int i, n, nmax;
int rshift;
int lshift;
int shift;
int nch = audio_stream_get_channels(source);
int inc_nch_s = nch * sizeof(int32_t);

for (ch = 0; ch < nch; ch++) {
/* Get FIR instance and get shifts.
*/
f = &fir[ch];
fir_get_lrshifts(f, &lshift, &rshift);
shift = lshift - rshift;

/* Copy src to x and advance src with dummy load */
fir_comp_setup_circular(source);
x = src;
AE_L32_XC(d0, src, sizeof(int32_t));

/* Copy snk to y0 and advance snk with dummy load. Pointer
* y1 is set to be ahead of y0 with one frame.
*/
fir_comp_setup_circular(sink);
y = snk;
AE_L32_XC(d0, snk, sizeof(int32_t));

for (i = 0; i < (frames >> 1); i++) {
/* Load two input samples via input pointer x */
fir_comp_setup_circular(source);
AE_L32_XC(d0, x, inc_nch_s);
AE_L32_XC(d1, x, inc_nch_s);

/* Convert Q1.23 to Q1.31 compatible format */
d0 = AE_SLAA32(d0, 8);
d1 = AE_SLAA32(d1, 8);

/* Compute FIR */
int samples = nch * frames;

while (samples) {
nmax = audio_stream_samples_without_wrap_s24(sink, dst);
n = MIN(nmax, samples);
nmax = audio_stream_samples_without_wrap_s24(source, src);
n = MIN(n, nmax);
for (ch = 0; ch < nch; ch++) {
/* Get FIR instance and get shifts.*/
f = &fir[ch];
fir_get_lrshifts(f, &lshift, &rshift);
shift = lshift - rshift;
/* set f->delay as circular buffer */
fir_core_setup_circular(f);
fir_32x16_2x_hifi3(f, d0, d1, &z0, &z1, shift);

/* Shift and round to Q1.23 format */
d0 = AE_SRAI32R(z0, 8);
d0 = AE_SLAI32S(d0, 8);
d0 = AE_SRAI32(d0, 8);
x = src + ch;
y = dst + ch;

for (i = 0; i < (n >> 1); i += nch) {
/* Load two input samples via input pointer x */
AE_L32_XP(d0, x, inc_nch_s);
AE_L32_XP(d1, x, inc_nch_s);

/* Convert Q1.23 to Q1.31 compatible format */
d0 = AE_SLAA32(d0, 8);
d1 = AE_SLAA32(d1, 8);

fir_32x16_2x_hifi3(f, d0, d1, &z0, &z1, shift);

/* Shift and round to Q1.23 format */
d0 = AE_SRAI32R(z0, 8);
d0 = AE_SLAI32S(d0, 8);
d0 = AE_SRAI32(d0, 8);

d1 = AE_SRAI32R(z1, 8);
d1 = AE_SLAI32S(d1, 8);
d1 = AE_SRAI32(d1, 8);
d1 = AE_SRAI32R(z1, 8);
d1 = AE_SLAI32S(d1, 8);
d1 = AE_SRAI32(d1, 8);

/* Store output and update output pointers */
fir_comp_setup_circular(sink);
AE_S32_L_XC(d0, y, inc_nch_s);
AE_S32_L_XC(d1, y, inc_nch_s);
/* Store output and update output pointers */
AE_S32_L_XC(d0, y, inc_nch_s);
AE_S32_L_XC(d1, y, inc_nch_s);
}
}
samples -= n;
dst = audio_stream_wrap(sink, dst + n);
src = audio_stream_wrap(source, src + n);
}
}
#endif /* CONFIG_FORMAT_S24LE */
Expand All @@ -174,61 +166,57 @@ void eq_fir_2x_s16(struct fir_state_32x16 fir[], struct input_stream_buffer *bso
ae_int32 x0;
ae_int32 x1;
ae_int16 *src = audio_stream_get_rptr(source);
ae_int16 *snk = audio_stream_get_wptr(sink);
ae_int16 *dst = audio_stream_get_wptr(sink);
ae_int16 *x;
ae_int16 *y;
int ch;
int i;
int i, n, nmax;
int rshift;
int lshift;
int shift;
int nch = audio_stream_get_channels(source);
int inc_nch_s = nch * sizeof(int16_t);

for (ch = 0; ch < nch; ch++) {
/* Get FIR instance and get shifts.
*/
f = &fir[ch];
fir_get_lrshifts(f, &lshift, &rshift);
shift = lshift - rshift;

/* Copy src to x and advance src to next channel with
* dummy load.
*/
fir_comp_setup_circular(source);
x = src;
AE_L16_XC(d0, src, sizeof(int16_t));

/* Copy pointer snk to y0 and advance snk with dummy load.
* Pointer y1 is set to be ahead of y0 with one frame.
*/
fir_comp_setup_circular(sink);
y = snk;
AE_L16_XC(d0, snk, sizeof(int16_t));

for (i = 0; i < (frames >> 1); i++) {
/* Load two input samples via input pointer x */
fir_comp_setup_circular(source);
AE_L16_XC(d0, x, inc_nch_s);
AE_L16_XC(d1, x, inc_nch_s);

/* Convert Q1.15 to Q1.31 compatible format */
x0 = AE_CVT32X2F16_32(d0);
x1 = AE_CVT32X2F16_32(d1);

/* Compute FIR */
int samples = nch * frames;

while (samples) {
nmax = audio_stream_samples_without_wrap_s16(sink, dst);
n = MIN(nmax, samples);
nmax = audio_stream_samples_without_wrap_s16(source, src);
n = MIN(n, nmax);
for (ch = 0; ch < nch; ch++) {
/* Get FIR instance and get shifts.*/
f = &fir[ch];
fir_get_lrshifts(f, &lshift, &rshift);
shift = lshift - rshift;
/* set f->delay as circular buffer */
fir_core_setup_circular(f);
fir_32x16_2x_hifi3(f, x0, x1, &z0, &z1, shift);

/* Round to Q1.15 format */
d0 = AE_ROUND16X4F32SSYM(z0, z0);
d1 = AE_ROUND16X4F32SSYM(z1, z1);
x = src + ch;
y = dst + ch;

for (i = 0; i < (n >> 1); i += nch) {
/* Load two input samples via input pointer x */
AE_L16_XP(d0, x, inc_nch_s);
AE_L16_XP(d1, x, inc_nch_s);

/* Convert Q1.15 to Q1.31 compatible format */
x0 = AE_CVT32X2F16_32(d0);
x1 = AE_CVT32X2F16_32(d1);

fir_32x16_2x_hifi3(f, x0, x1, &z0, &z1, shift);

/* Round to Q1.15 format */
d0 = AE_ROUND16X4F32SSYM(z0, z0);
d1 = AE_ROUND16X4F32SSYM(z1, z1);

/* Store output and update output pointers */
fir_comp_setup_circular(sink);
AE_S16_0_XC(d0, y, inc_nch_s);
AE_S16_0_XC(d1, y, inc_nch_s);
/* Store output and update output pointers */
AE_S16_0_XC(d0, y, inc_nch_s);
AE_S16_0_XC(d1, y, inc_nch_s);
}
}
samples -= n;
dst = audio_stream_wrap(sink, dst + n);
src = audio_stream_wrap(source, src + n);
}
}
#endif /* CONFIG_FORMAT_S16LE */
Expand Down
2 changes: 1 addition & 1 deletion src/include/sof/math/fir_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
#if XCHAL_HAVE_HIFI2EP == 1
#define FIR_HIFIEP 1
#define FIR_HIFI3 0
#elif XCHAL_HAVE_HIFI3 == 1
#elif XCHAL_HAVE_HIFI3 == 1 || XCHAL_HAVE_HIFI4 == 1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch! I didn't realize that HIFI4 build does not have HIFI3 set while they are mostly compatible. I wonder if there's other similar issues in code?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually in our config, we enabled HIFI3 while HIFI4 build. Add || XCHAL_HAVE_HIFI4 == 1 is because I'm not sure if the HIFI3 will still enabled while we have HIFI5 or other newer version(but might HIFI4 is enabled). I can check the other code.

#define FIR_HIFI3 1
#define FIR_HIFIEP 0
#else
Expand Down