thesofproject · fredoh9 · Jun 27, 2023 · Jun 27, 2023
@@ -38,68 +38,54 @@ void tdfb_fir_s16(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource
 	int in_nch = audio_stream_get_channels(source);
 	int out_nch = audio_stream_get_channels(sink);
 	int emp_ch = 0;
-	int n, nmax;
-	int remaining_frames = frames;
-	const int inc = sizeof(ae_int16);
 
-	while (remaining_frames) {
-		nmax = audio_stream_frames_without_wrap(source, x);
-		n = MIN(remaining_frames, nmax);
-		nmax = audio_stream_frames_without_wrap(sink, y);
-		n = MIN(n, nmax);
+	for (j = 0; j < (frames >> 1); j++) {
+		/* Clear output mix*/
+		memset(cd->out, 0,  2 * out_nch * sizeof(int32_t));
 
-		for (j = 0; j < n; j += 2) {
-			/* Clear output mix*/
-			memset(cd->out, 0,  2 * out_nch * sizeof(int32_t));
-
-			/* Read two frames from all input channels
-			 * there won't be buffer overflow since we
-			 * set 2 frames align in tdfb_prepare function.
-			 */
-			for (i = 0; i < 2 * in_nch; i++) {
-				AE_L16_XP(d, x, inc);
-				cd->in[i] = (ae_int32)AE_CVT32X2F16_32(d);
-				tdfb_direction_copy_emphasis(cd, in_nch, &emp_ch, cd->in[i]);
-			}
-
-			/* Run and mix all filters to their output channel */
-			for (i = 0; i < cfg->num_filters; i++) {
-				is = cd->input_channel_select[i];
-				is2 = is + in_nch;
-				om = cd->output_channel_mix[i];
-
-				/* Get filter instance */
-				f = &cd->fir[i];
-				shift = -f->out_shift;
+		/* Read two frames from all input channels */
+		fir_comp_setup_circular(source);
+		for (i = 0; i < 2 * in_nch; i++) {
+			AE_L16_XC(d, x, sizeof(int16_t));
+			cd->in[i] = (ae_int32)AE_CVT32X2F16_32(d);
+			tdfb_direction_copy_emphasis(cd, in_nch, &emp_ch, cd->in[i]);
+		}
 
-				/* Compute FIR and mix as Q5.27*/
-				fir_core_setup_circular(f);
-				fir_32x16_2x_hifi3(f, cd->in[is], cd->in[is2], &y0, &y1,
-						   shift);
-				for (k = 0; k < out_nch; k++) {
-					if (om & 1) {
-						cd->out[k] += (int32_t)y0 >> 4;
-						cd->out[k + out_nch] +=
-							(int32_t)y1 >> 4;
-					}
-					om = om >> 1;
+		/* Run and mix all filters to their output channel */
+		for (i = 0; i < cfg->num_filters; i++) {
+			is = cd->input_channel_select[i];
+			is2 = is + in_nch;
+			om = cd->output_channel_mix[i];
+
+			/* Get filter instance */
+			f = &cd->fir[i];
+			shift = -f->out_shift;
+
+			/* Compute FIR and mix as Q5.27*/
+			fir_core_setup_circular(f);
+			fir_32x16_2x_hifi3(f, cd->in[is], cd->in[is2], &y0, &y1,
+					   shift);
+			for (k = 0; k < out_nch; k++) {
+				if (om & 1) {
+					cd->out[k] += (int32_t)y0 >> 4;
+					cd->out[k + out_nch] +=
+						(int32_t)y1 >> 4;
 				}
+				om = om >> 1;
 			}
+		}
 
-			/* Write two frames of output. The values in out[] are shifted
-			 * left and saturated to convert to Q1.27. The values
-			 * are then rounded to 16 bit and converted to Q1.15 for
-			 * sink buffer. TODO: Could saturate four samples with
-			 * one AE_ROUND16X4F32SSYM() instruction.
-			 */
-			for (i = 0; i < 2 * out_nch; i++) {
-				d = AE_ROUND16X4F32SSYM(0, AE_SLAI32S(cd->out[i], 4));
-				AE_S16_0_XP(d, y, inc);
-			}
+		/* Write two frames of output. The values in out[] are shifted
+		 * left and saturated to convert to Q1.27. The the values
+		 * are then rounded to 16 bit and converted to Q1.15 for
+		 * sink buffer. TODO: Could saturate four samples with
+		 * one AE_ROUND16X4F32SSYM() instruction.
+		 */
+		fir_comp_setup_circular(sink);
+		for (i = 0; i < 2 * out_nch; i++) {
+			d = AE_ROUND16X4F32SSYM(0, AE_SLAI32S(cd->out[i], 4));
+			AE_S16_0_XC(d, y, sizeof(int16_t));
 		}
-		remaining_frames -= n;
-		x = audio_stream_wrap(source, x);
-		y = audio_stream_wrap(sink, y);
 	}
 }
 #endif
@@ -127,68 +113,53 @@ void tdfb_fir_s24(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource
 	int in_nch = audio_stream_get_channels(source);
 	int out_nch = audio_stream_get_channels(sink);
 	int emp_ch = 0;
-	int n, nmax;
-	int remaining_frames = frames;
-	const int inc = sizeof(ae_int32);
-
-	while (remaining_frames) {
-		nmax = audio_stream_frames_without_wrap(source, x);
-		n = MIN(remaining_frames, nmax);
-		nmax = audio_stream_frames_without_wrap(sink, y);
-		n = MIN(n, nmax);
-
-		for (j = 0; j < n; j += 2) {
-			/* Clear output mix*/
-			memset(cd->out, 0,  2 * out_nch * sizeof(int32_t));
-
-			/* Read two frames from all input channels
-			 * there won't be buffer overflow since we
-			 * set 2 frames align in tdfb_prepare function.
-			 */
-			for (i = 0; i < 2 * in_nch; i++) {
-				AE_L32_XP(d, x, inc);
-				cd->in[i] = AE_SLAI32(d, 8);
-				tdfb_direction_copy_emphasis(cd, in_nch, &emp_ch, cd->in[i]);
-			}
 
-			/* Run and mix all filters to their output channel */
-			for (i = 0; i < cfg->num_filters; i++) {
-				is = cd->input_channel_select[i];
-				is2 = is + in_nch;
-				om = cd->output_channel_mix[i];
+	for (j = 0; j < (frames >> 1); j++) {
+		/* Clear output mix*/
+		memset(cd->out, 0,  2 * out_nch * sizeof(int32_t));
 
-				/* Get filter instance */
-				f = &cd->fir[i];
-				shift = -f->out_shift;
+		/* Read two frames from all input channels */
+		fir_comp_setup_circular(source);
+		for (i = 0; i < 2 * in_nch; i++) {
+			AE_L32_XC(d, x, sizeof(int32_t));
+			cd->in[i] = AE_SLAI32(d, 8);
+			tdfb_direction_copy_emphasis(cd, in_nch, &emp_ch, cd->in[i]);
+		}
 
-				/* Compute FIR and mix as Q5.27*/
-				fir_core_setup_circular(f);
-				fir_32x16_2x_hifi3(f, cd->in[is], cd->in[is2], &y0, &y1,
-						   shift);
-				for (k = 0; k < out_nch; k++) {
-					if (om & 1) {
-						cd->out[k] += (int32_t)y0 >> 4;
-						cd->out[k + out_nch] +=
-							(int32_t)y1 >> 4;
-					}
-					om = om >> 1;
+		for (i = 0; i < cfg->num_filters; i++) {
+			is = cd->input_channel_select[i];
+			is2 = is + in_nch;
+			om = cd->output_channel_mix[i];
+
+			/* Get filter instance */
+			f = &cd->fir[i];
+			shift = -f->out_shift;
+
+			/* Compute FIR and mix as Q5.27*/
+			fir_core_setup_circular(f);
+			fir_32x16_2x_hifi3(f, cd->in[is], cd->in[is2], &y0, &y1,
+					   shift);
+			for (k = 0; k < out_nch; k++) {
+				if (om & 1) {
+					cd->out[k] += (int32_t)y0 >> 4;
+					cd->out[k + out_nch] +=
+						(int32_t)y1 >> 4;
 				}
+				om = om >> 1;
 			}
+		}
 
-			/* Write two frames of output. The values in out[] are shifted
-			 * left and saturated to convert to Q1.27. The values
-			 * are then rounded to 16 bit and converted to Q1.15 for
-			 * sink buffer. TODO: Could saturate four samples with
-			 * one AE_ROUND16X4F32SSYM() instruction.
-			 */
-			for (i = 0; i < 2 * out_nch; i++) {
-				d = AE_SRAI32(AE_SLAI32S(AE_SRAI32R(cd->out[i], 4), 8), 8);
-				AE_S32_L_XP(d, y, inc);
-			}
+		/* Write two frames of output. The values from out[] are first
+		 * rounded to Q5.23 format, then saturated to Q1.23, and
+		 * shifted by 8 to LSB side of the word before storing to sink.
+		 * TODO: Could shift etc. two samples simultaneously.
+		 */
+		fir_comp_setup_circular(sink);
+		for (i = 0; i < 2 * out_nch; i++) {
+			d = AE_SRAI32(AE_SLAI32S(AE_SRAI32R(cd->out[i], 4), 8),
+				      8);
+			AE_S32_L_XC(d, y, sizeof(int32_t));
 		}
-		remaining_frames -= n;
-		x = audio_stream_wrap(source, x);
-		y = audio_stream_wrap(sink, y);
 	}
 }
 #endif
@@ -216,66 +187,52 @@ void tdfb_fir_s32(struct tdfb_comp_data *cd, struct input_stream_buffer *bsource
 	int in_nch = audio_stream_get_channels(source);
 	int out_nch = audio_stream_get_channels(sink);
 	int emp_ch = 0;
-	int n, nmax;
-	int remaining_frames = frames;
-	const int inc = sizeof(ae_int32);
-
-	while (remaining_frames) {
-		nmax = audio_stream_frames_without_wrap(source, x);
-		n = MIN(remaining_frames, nmax);
-		nmax = audio_stream_frames_without_wrap(sink, y);
-		n = MIN(n, nmax);
-
-		for (j = 0; j < n; j += 2) {
-			/* Clear output mix*/
-			memset(cd->out, 0,  2 * out_nch * sizeof(int32_t));
 
-			/* Read two frames from all input channels
-			 * there won't be buffer overflow since we
-			 * set 2 frames align in tdfb_prepare function.
-			 */
-			for (i = 0; i < 2 * in_nch; i++) {
-				AE_L32_XC(d, x, inc);
-				cd->in[i] = d;
-				tdfb_direction_copy_emphasis(cd, in_nch, &emp_ch, cd->in[i]);
-			}
-
-			for (i = 0; i < cfg->num_filters; i++) {
-				is = cd->input_channel_select[i];
-				is2 = is + in_nch;
-				om = cd->output_channel_mix[i];
+	for (j = 0; j < (frames >> 1); j++) {
+		/* Clear output mix*/
+		memset(cd->out, 0,  2 * out_nch * sizeof(int32_t));
 
-				/* Get filter instance */
-				f = &cd->fir[i];
-				shift = -f->out_shift;
+		/* Read two frames from all input channels */
+		fir_comp_setup_circular(source);
+		for (i = 0; i < 2 * in_nch; i++) {
+			AE_L32_XC(d, x, sizeof(int32_t));
+			cd->in[i] = d;
+			tdfb_direction_copy_emphasis(cd, in_nch, &emp_ch, cd->in[i]);
+		}
 
-				/* Compute FIR and mix as Q5.27*/
-				fir_core_setup_circular(f);
-				fir_32x16_2x_hifi3(f, cd->in[is], cd->in[is2], &y0, &y1,
-						   shift);
-				for (k = 0; k < out_nch; k++) {
-					if (om & 1) {
-						cd->out[k] += (int32_t)y0 >> 4;
-						cd->out[k + out_nch] +=
-							(int32_t)y1 >> 4;
-					}
-					om = om >> 1;
+		for (i = 0; i < cfg->num_filters; i++) {
+			is = cd->input_channel_select[i];
+			is2 = is + in_nch;
+			om = cd->output_channel_mix[i];
+
+			/* Get filter instance */
+			f = &cd->fir[i];
+			shift = -f->out_shift;
+
+			/* Compute FIR and mix as Q5.27*/
+			fir_core_setup_circular(f);
+			fir_32x16_2x_hifi3(f, cd->in[is], cd->in[is2], &y0, &y1,
+					   shift);
+			for (k = 0; k < out_nch; k++) {
+				if (om & 1) {
+					cd->out[k] += (int32_t)y0 >> 4;
+					cd->out[k + out_nch] +=
+						(int32_t)y1 >> 4;
 				}
+				om = om >> 1;
 			}
+		}
 
-			/* Write two frames of output. In Q5.27 to Q1.31 conversion
-			 * rounding is not applicable so just shift left by 4 and
-			 * saturate. TODO: Could shift two samples with one
-			 * instruction.
-			 */
-			for (i = 0; i < 2 * out_nch; i++) {
-				d = AE_SLAI32S(cd->out[i], 4);
-				AE_S32_L_XP(d, y, inc);
-			}
+		/* Write two frames of output. In Q5.27 to Q1.31 conversion
+		 * rounding is not applicable so just shift left by 4 and
+		 * saturate. TODO: Could shift two samples with one
+		 * instruction.
+		 */
+		fir_comp_setup_circular(sink);
+		for (i = 0; i < 2 * out_nch; i++) {
+			d = AE_SLAI32S(cd->out[i], 4);
+			AE_S32_L_XC(d, y, sizeof(int32_t));
 		}
-		remaining_frames -= n;
-		x = audio_stream_wrap(source, x);
-		y = audio_stream_wrap(sink, y);
 	}
 }
 #endif

@@ -24,7 +24,7 @@
 #define TDFB_GENERIC	0
 #define TDFB_HIFIEP	1
 #define TDFB_HIFI3	0
-#elif XCHAL_HAVE_HIFI3 == 1 || XCHAL_HAVE_HIFI4 == 1
+#elif XCHAL_HAVE_HIFI3 == 1
 #define TDFB_HIFI3	1
 #define TDFB_HIFIEP	0
 #define TDFB_GENERIC	0