Skip to content

Commit 33fcded

Browse files
Audio: aec: optimize acoustic echo cancellation processing
This check-in introduces performance optimization modifications to the audio Echo Cancellation (AEC) implementation. The enhancements primarily focus on refining loop structures and memory copy operations to ensure more efficient use of cycles. Signed-off-by: shastry <malladi.sastry@intel.com>
1 parent 3681e09 commit 33fcded

File tree

2 files changed

+234
-87
lines changed

2 files changed

+234
-87
lines changed

src/audio/google/google_rtc_audio_processing.c

Lines changed: 167 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@
4242
#define GOOGLE_RTC_AUDIO_PROCESSING_FREQENCY_TO_PERIOD_FRAMES 100
4343
#define GOOGLE_RTC_NUM_INPUT_PINS 2
4444
#define GOOGLE_RTC_NUM_OUTPUT_PINS 1
45+
#define ERR_INVALID_REF -1
46+
#define ERR_MEMCPY_FAIL -2
47+
#define ERR_INVALID_SRC -3
48+
#define ERR_INVALID_DST -4
49+
4550

4651
LOG_MODULE_REGISTER(google_rtc_audio_processing, CONFIG_SOF_LOG_LEVEL);
4752

@@ -791,7 +796,6 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
791796
size_t dst_buf_size;
792797

793798
size_t num_of_bytes_to_process;
794-
size_t channel;
795799
size_t buffer_offset;
796800

797801
struct sof_source *ref_stream, *src_stream;
@@ -822,23 +826,60 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
822826
/* 32float: de-interlace ref buffer, convert it to float, skip channels if > Max
823827
* 16int: linearize buffer, skip channels if > Max
824828
*/
829+
/* Reduce cycle waste by streamlining the inner loop,
830+
* converting from array indexing to pointer arithmetic,
831+
* and putting data copy verification outside the loop.
832+
*/
825833
buffer_offset = 0;
826-
for (int i = 0; i < cd->num_frames; i++) {
827-
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel) {
828-
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
829-
cd->aec_reference_buffer_ptrs[channel][i] =
830-
convert_int16_to_float(ref[channel]);
831-
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
832-
cd->aec_reference_buffer[buffer_offset++] = ref[channel];
833-
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
834+
const int16_t *ref_end = ref + cd->num_frames * cd->num_aec_reference_channels;
835+
836+
if ((void *)ref_end >= (void *)ref_buf_end)
837+
ref_end = (void *)ref_buf_start;
834838

839+
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
840+
float **ref_ptr = cd->aec_reference_buffer_ptrs;
841+
int s_chan;
842+
int i;
843+
844+
/* Loop over frames and channels, converting data from int16 to float */
845+
for (i = 0; i < cd->num_frames; ++i) {
846+
for (s_chan = 0; s_chan < cd->num_aec_reference_channels; ++s_chan) {
847+
/* Check that ref is within the valid range of the ref_buf buffer */
848+
if (ref && (void *)ref >= (void *)ref_buf_start &&
849+
(void *)ref < (void *)ref_buf_end)
850+
(*ref_ptr)[s_chan] = convert_int16_to_float(*ref++);
851+
else
852+
/* ref does not point to valid int16_t data */
853+
return ERR_INVALID_REF;
835854
}
855+
ref_ptr++;
856+
}
836857

837-
ref += cd->num_aec_reference_channels;
838-
if ((void *)ref >= (void *)ref_buf_end)
839-
ref = (void *)ref_buf_start;
858+
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
859+
int16_t *ref_buf = cd->aec_reference_buffer;
860+
861+
/* Check that ref is within the valid range of the ref_buf buffer */
862+
if (ref && (void *)ref >= (void *)ref_buf_start && (void *)ref < (void *)ref_buf_end) {
863+
/* Use memcpy_s to copy the data from ref buffer to ref_buf buffer until it reaches
864+
* ref_end
865+
* This assumes that the data in the ref buffer is contiguous
866+
*/
867+
size_t num_bytes = (ref_end - ref) * sizeof(*ref);
868+
869+
if (memcpy_s(ref_buf, num_bytes, ref, num_bytes) != 0) {
870+
/* Handle error */
871+
return -2;
872+
}
873+
/* Update the ref and ref_buf pointers */
874+
ref = ref_end;
875+
ref_buf += (ref_end - ref);
876+
} else {
877+
/* ref does not point to valid int16_t data */
878+
return ERR_MEMCPY_FAIL;
840879
}
841880

881+
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
882+
842883
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
843884
GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state,
844885
(const float **)
@@ -855,26 +896,57 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
855896
(const void **)&src_buf_start, &src_buf_size);
856897
assert(!ret);
857898
src_buf_end = src_buf_start + src_buf_size;
858-
899+
/* The second optimization eliminates the inner loop
900+
* and replaces it with pointer arithmetic for speedier access.
901+
* To reduce cycle waste, the data copy check is moved outside of the loop.
902+
*/
903+
/* Initialize error_code to 0 (no error) */
904+
int error_code = 0;
859905
buffer_offset = 0;
860-
for (int i = 0; i < cd->num_frames; i++) {
861-
for (channel = 0; channel < cd->num_capture_channels; channel++)
906+
const int16_t *src_end = src + cd->num_frames * cd->config.output_fmt.channels_count;
907+
908+
if ((void *)src_end >= (void *)src_buf_end)
909+
src_end = (void *)src_buf_start;
910+
862911
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
863-
cd->process_buffer_ptrs[channel][i] = convert_int16_to_float(src[channel]);
912+
float **proc_ptr = cd->process_buffer_ptrs;
913+
914+
/* Check for null pointers and buffer overflows */
915+
if (!src || !proc_ptr || src >= src_end)
916+
/* If there's an error, set error_code to ERR_INVALID_SRC but don't return yet */
917+
error_code = ERR_INVALID_SRC;
918+
else
919+
/* If there's no error, continue processing */
920+
while (src != src_end) {
921+
if ((void *)src >= (void *)src_buf_end)
922+
src = (void *)src_buf_start;
923+
924+
*proc_ptr++ = convert_int16_to_float(src++);
925+
}
926+
864927
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
865-
cd->process_buffer[buffer_offset++] = src[channel];
866-
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
928+
int16_t *proc_buf = cd->process_buffer;
929+
930+
/* Check for null pointers and buffer overflows */
931+
if (!src || !proc_buf || src >= src_end)
932+
/* If there's an error, set error_code to ERR_INVALID_SRC but don't return yet */
933+
error_code = ERR_INVALID_SRC;
934+
else
935+
/* If there's no error, continue processing */
936+
while (src != src_end) {
937+
if ((void *)src >= (void *)src_buf_end)
938+
src = (void *)src_buf_start;
939+
940+
*proc_buf++ = *src++;
941+
}
867942

868-
/* move pointer to next frame
869-
* number of incoming channels may be < cd->num_capture_channels
870-
*/
871-
src += cd->config.output_fmt.channels_count;
872-
if ((void *)src >= (void *)src_buf_end)
873-
src = (void *)src_buf_start;
874-
}
943+
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
875944

876945
source_release_data(src_stream, num_of_bytes_to_process);
877946

947+
/* Return the error code. If there was no error, this will be 0 */
948+
return error_code;
949+
878950
/* call the library, use same in/out buffers */
879951
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
880952
GoogleRtcAudioProcessingProcessCapture_float32(cd->state,
@@ -894,24 +966,33 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
894966

895967
/* process all channels in output stream */
896968
buffer_offset = 0;
897-
for (int i = 0; i < cd->num_frames; i++) {
898-
for (channel = 0; channel < cd->config.output_fmt.channels_count; channel++) {
899-
/* set data in processed channels, zeroize not processed */
900-
if (channel < cd->num_capture_channels)
969+
int16_t *dst_end = dst + cd->num_frames * cd->config.output_fmt.channels_count;
970+
971+
if ((void *)dst_end >= (void *)dst_buf_end)
972+
dst_end = (void *)dst_buf_start;
973+
901974
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
902-
dst[channel] = convert_float_to_int16(
903-
cd->process_buffer_ptrs[channel][i]);
975+
float **proc_ptr = cd->process_buffer_ptrs;
976+
977+
/* Check for null pointers and buffer overflows */
978+
if (!dst || !proc_ptr || dst >= dst_end || *proc_ptr >= *proc_ptr + cd->num_frames)
979+
return ERR_INVALID_DST;
980+
981+
while (dst != dst_end)
982+
*dst++ = convert_float_to_int16(*proc_ptr++);
983+
904984
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
905-
dst[channel] = cd->process_buffer[buffer_offset++];
906-
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
907-
else
908-
dst[channel] = 0;
909-
}
985+
int16_t *process_buffer = cd->process_buffer;
910986

911-
dst += cd->config.output_fmt.channels_count;
912-
if ((void *)dst >= (void *)dst_buf_end)
913-
dst = (void *)dst_buf_start;
914-
}
987+
/* Check for null pointers and buffer overflows */
988+
if (!dst || !process_buffer || dst >= dst_end ||
989+
process_buffer >= process_buffer + cd->num_frames)
990+
return ERR_INVALID_DST;
991+
992+
while (dst != dst_end)
993+
*dst++ = *process_buffer++;
994+
995+
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
915996

916997
sink_commit_buffer(dst_stream, num_of_bytes_to_process);
917998

@@ -928,6 +1009,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
9281009
int16_t *src, *dst, *ref;
9291010
uint32_t num_aec_reference_frames;
9301011
uint32_t num_aec_reference_bytes;
1012+
int ref_channels;
1013+
int aec_ref_product;
9311014
int num_samples_remaining;
9321015
int num_frames_remaining;
9331016
int channel;
@@ -950,25 +1033,33 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
9501033
ref_stream = ref_streamb->data;
9511034
ref = audio_stream_get_rptr(ref_stream);
9521035

1036+
/* Pre-calculate the number of channels in the reference stream for efficiency */
1037+
ref_channels = audio_stream_get_channels(ref_stream);
1038+
1039+
/* Pre-calculate the product of the number of AEC reference channels and the AEC
1040+
* reference frame index
1041+
*/
1042+
aec_ref_product = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
1043+
9531044
num_aec_reference_frames = input_buffers[cd->aec_reference_source].size;
9541045
num_aec_reference_bytes = audio_stream_frame_bytes(ref_stream) * num_aec_reference_frames;
9551046

956-
num_samples_remaining = num_aec_reference_frames * audio_stream_get_channels(ref_stream);
1047+
num_samples_remaining = num_aec_reference_frames * ref_channels;
9571048
while (num_samples_remaining) {
9581049
nmax = audio_stream_samples_without_wrap_s16(ref_stream, ref);
9591050
n = MIN(num_samples_remaining, nmax);
9601051
for (i = 0; i < n; i += cd->num_aec_reference_channels) {
961-
j = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
1052+
j = aec_ref_product;
9621053
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel)
9631054
cd->aec_reference_buffer[j++] = ref[channel];
964-
965-
ref += audio_stream_get_channels(ref_stream);
1055+
ref += ref_channels;
9661056
++cd->aec_reference_frame_index;
967-
9681057
if (cd->aec_reference_frame_index == cd->num_frames) {
9691058
GoogleRtcAudioProcessingAnalyzeRender_int16(cd->state,
970-
cd->aec_reference_buffer);
1059+
cd->aec_reference_buffer);
9711060
cd->aec_reference_frame_index = 0;
1061+
/* Reset the product as the frame index is reset */
1062+
aec_ref_product = 0;
9721063
}
9731064
}
9741065
num_samples_remaining -= n;
@@ -984,6 +1075,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
9841075
src = audio_stream_get_rptr(mic_stream);
9851076
dst = audio_stream_get_wptr(out_stream);
9861077

1078+
/* Move out of loop */
1079+
int mic_stream_channels = audio_stream_get_channels(mic_stream);
9871080
frames = input_buffers[cd->raw_microphone_source].size;
9881081
num_frames_remaining = frames;
9891082

@@ -993,34 +1086,40 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
9931086
nmax = audio_stream_frames_without_wrap(out_stream, dst);
9941087
n = MIN(n, nmax);
9951088
for (i = 0; i < n; i++) {
996-
memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
997-
cd->num_capture_channels]),
998-
cd->num_frames * cd->num_capture_channels *
999-
sizeof(cd->raw_mic_buffer[0]), src,
1000-
sizeof(int16_t) * cd->num_capture_channels);
1001-
++cd->raw_mic_buffer_frame_index;
1002-
1003-
memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
1004-
sizeof(cd->output_buffer[0]),
1005-
&(cd->output_buffer[cd->output_buffer_frame_index *
1006-
cd->num_capture_channels]),
1007-
sizeof(int16_t) * cd->num_capture_channels);
1008-
++cd->output_buffer_frame_index;
1009-
1010-
if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
1011-
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
1012-
cd->raw_mic_buffer,
1013-
cd->output_buffer);
1014-
cd->output_buffer_frame_index = 0;
1015-
cd->raw_mic_buffer_frame_index = 0;
1089+
/* If we haven't filled the buffer yet, copy the data */
1090+
if (cd->raw_mic_buffer_frame_index < cd->num_frames) {
1091+
memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
1092+
cd->num_capture_channels]),
1093+
cd->num_frames * cd->num_capture_channels *
1094+
sizeof(cd->raw_mic_buffer[0]), src,
1095+
sizeof(int16_t) * cd->num_capture_channels);
1096+
++cd->raw_mic_buffer_frame_index;
1097+
}
1098+
1099+
if (cd->output_buffer_frame_index < cd->num_frames) {
1100+
memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
1101+
sizeof(cd->output_buffer[0]),
1102+
&(cd->output_buffer[cd->output_buffer_frame_index *
1103+
cd->num_capture_channels]),
1104+
sizeof(int16_t) * cd->num_capture_channels);
1105+
++cd->output_buffer_frame_index;
10161106
}
10171107

1018-
src += audio_stream_get_channels(mic_stream);
1019-
dst += audio_stream_get_channels(out_stream);
1108+
src += mic_stream_channels;
1109+
dst += mic_stream_channels;
10201110
}
10211111
num_frames_remaining -= n;
10221112
src = audio_stream_wrap(mic_stream, src);
10231113
dst = audio_stream_wrap(out_stream, dst);
1114+
1115+
/* If we've filled the buffer, process the data */
1116+
if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
1117+
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
1118+
cd->raw_mic_buffer,
1119+
cd->output_buffer);
1120+
cd->output_buffer_frame_index = 0;
1121+
cd->raw_mic_buffer_frame_index = 0;
1122+
}
10241123
}
10251124

10261125
module_update_buffer_position(&input_buffers[cd->raw_microphone_source],

0 commit comments

Comments
 (0)