diff --git a/src/audio/CMakeLists.txt b/src/audio/CMakeLists.txt index 8c3e00dd5752..6f8e83262b7e 100644 --- a/src/audio/CMakeLists.txt +++ b/src/audio/CMakeLists.txt @@ -125,6 +125,9 @@ if(NOT CONFIG_LIBRARY) if(CONFIG_COMP_ARIA) add_subdirectory(aria) endif() + if(CONFIG_COMP_UP_DOWN_MIXER) + add_subdirectory(up_down_mixer) + endif() subdirs(pipeline) return() diff --git a/src/audio/Kconfig b/src/audio/Kconfig index fd90d2dd2729..271bd7d5bda8 100644 --- a/src/audio/Kconfig +++ b/src/audio/Kconfig @@ -37,6 +37,20 @@ config COMP_VOLUME help Select for Volume component +config COMP_UP_DOWN_MIXER + bool "UP_DOWN_MIXER component" + default n + depends on IPC_MAJOR_4 + help + Select for Up Down Mixer component Conversions supported: + Up/Downmixing for stereo output: + 1, 2, 2.1, 3.0, 3.1, Quatro, 4.0, 5.0, 5.1, 7.1 -> 2 + Upmixing for multichannel output: + 1, 2 -> 5.1 + 2 -> 7.1 + Downmixing for mono output: + 4.0, Quatro, 3.1, 2 -> 1 + if COMP_VOLUME config COMP_VOLUME_WINDOWS_FADE diff --git a/src/audio/up_down_mixer/CMakeLists.txt b/src/audio/up_down_mixer/CMakeLists.txt new file mode 100644 index 000000000000..57e9e22e3ee4 --- /dev/null +++ b/src/audio/up_down_mixer/CMakeLists.txt @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: BSD-3-Clause + +add_local_sources(sof up_down_mixer.c) +add_local_sources(sof up_down_mixer_hifi3.c) diff --git a/src/audio/up_down_mixer/up_down_mixer.c b/src/audio/up_down_mixer/up_down_mixer.c new file mode 100644 index 000000000000..3621bc7b38ec --- /dev/null +++ b/src/audio/up_down_mixer/up_down_mixer.c @@ -0,0 +1,523 @@ +// SPDX-License-Identifier: BSD-3-Clause +// +// Copyright(c) 2022 Intel Corporation. All rights reserved. +// +// Author: Bartosz Kokoszko +// Author: Adrian Bonislawski + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct comp_driver comp_up_down_mixer; + +/* these ids aligns windows driver requirement to support windows driver */ +/* 42f8060c-832f-4dbf-b247-51e961997b34 */ +DECLARE_SOF_RT_UUID("up_down_mixer", up_down_mixer_comp_uuid, 0x42f8060c, 0x832f, + 0x4dbf, 0xb2, 0x47, 0x51, 0xe9, 0x61, 0x99, 0x7b, 0x34); + +DECLARE_TR_CTX(up_down_mixer_comp_tr, SOF_UUID(up_down_mixer_comp_uuid), + LOG_LEVEL_INFO); + +const int32_t custom_coeffs[UP_DOWN_MIX_COEFFS_LENGTH]; + +static int set_downmix_coefficients(struct comp_dev *dev, + const struct ipc4_audio_format *format, + const enum ipc4_channel_config out_channel_config, + const downmix_coefficients downmix_coefficients) +{ + struct up_down_mixer_data *cd = comp_get_drvdata(dev); + int ret; + + if (cd->downmix_coefficients) { + ret = memcpy_s(&custom_coeffs, sizeof(custom_coeffs), downmix_coefficients, + sizeof(int32_t) * UP_DOWN_MIX_COEFFS_LENGTH); + + if (ret < 0) + return ret; + + cd->downmix_coefficients = custom_coeffs; + + return 0; + } + + switch (format->ch_cfg) { + case IPC4_CHANNEL_CONFIG_MONO: + case IPC4_CHANNEL_CONFIG_STEREO: + case IPC4_CHANNEL_CONFIG_2_POINT_1: + case IPC4_CHANNEL_CONFIG_DUAL_MONO: + cd->downmix_coefficients = k_lo_ro_downmix32bit; + break; + case IPC4_CHANNEL_CONFIG_3_POINT_0: + case IPC4_CHANNEL_CONFIG_3_POINT_1: + if (format->depth == IPC4_DEPTH_16BIT) + cd->downmix_coefficients = k_half_scaled_lo_ro_downmix16bit; + else + cd->downmix_coefficients = k_half_scaled_lo_ro_downmix32bit; + break; + case IPC4_CHANNEL_CONFIG_QUATRO: + if (out_channel_config == IPC4_CHANNEL_CONFIG_MONO) { + cd->downmix_coefficients = (format->depth == IPC4_DEPTH_16BIT) ? + k_quatro_mono_scaled_lo_ro_downmix16bit : + k_quatro_mono_scaled_lo_ro_downmix32bit; + } else { /*out_channel_config == IPC4_CHANNEL_CONFIG_STEREO*/ + cd->downmix_coefficients = (format->depth == IPC4_DEPTH_16BIT) ? + k_half_scaled_lo_ro_downmix16bit : + k_half_scaled_lo_ro_downmix32bit; + } + break; + case IPC4_CHANNEL_CONFIG_4_POINT_0: + if (format->depth == IPC4_DEPTH_16BIT) { + cd->downmix_coefficients = k_scaled_lo_ro_downmix16bit; + } else { + if (out_channel_config == IPC4_CHANNEL_CONFIG_5_POINT_1) + cd->downmix_coefficients = k_lo_ro_downmix32bit; + else + cd->downmix_coefficients = k_scaled_lo_ro_downmix32bit; + } + break; + case IPC4_CHANNEL_CONFIG_5_POINT_0: + case IPC4_CHANNEL_CONFIG_5_POINT_1: + case IPC4_CHANNEL_CONFIG_7_POINT_1: + cd->downmix_coefficients = k_scaled_lo_ro_downmix32bit; + break; + default: + comp_err(dev, "set_downmix_coefficients(): invalid channel config."); + return -EINVAL; + } + + return 0; +} + +static up_down_mixer_routine select_mix_out_stereo(struct comp_dev *dev, + const struct ipc4_audio_format *format) +{ + if (format->depth == IPC4_DEPTH_16BIT) { + switch (format->ch_cfg) { + case IPC4_CHANNEL_CONFIG_MONO: + return shiftcopy16bit_mono; + case IPC4_CHANNEL_CONFIG_DUAL_MONO: + case IPC4_CHANNEL_CONFIG_STEREO: + return shiftcopy16bit_stereo; + case IPC4_CHANNEL_CONFIG_2_POINT_1: + case IPC4_CHANNEL_CONFIG_3_POINT_0: + case IPC4_CHANNEL_CONFIG_3_POINT_1: + case IPC4_CHANNEL_CONFIG_QUATRO: + case IPC4_CHANNEL_CONFIG_4_POINT_0: + case IPC4_CHANNEL_CONFIG_5_POINT_0: + return downmix16bit; + case IPC4_CHANNEL_CONFIG_5_POINT_1: + return downmix16bit_5_1; + case IPC4_CHANNEL_CONFIG_INVALID: + default: + comp_err(dev, "select_mix_out_stereo(): invalid channel config."); + /* + * This is a strange situation. We will allow to process it + * in the release code (hoping for the best) with downmix16bit, + * but will log err in debug to double check if everything is ok + */ + return NULL; + } + } else { + switch (format->ch_cfg) { + case IPC4_CHANNEL_CONFIG_MONO: + return shiftcopy32bit_mono; + case IPC4_CHANNEL_CONFIG_DUAL_MONO: + case IPC4_CHANNEL_CONFIG_STEREO: + return shiftcopy32bit_stereo; + case IPC4_CHANNEL_CONFIG_2_POINT_1: + return downmix32bit_2_1; + case IPC4_CHANNEL_CONFIG_3_POINT_0: + return downmix32bit_3_0; + case IPC4_CHANNEL_CONFIG_3_POINT_1: + return downmix32bit_3_1; + case IPC4_CHANNEL_CONFIG_QUATRO: + return downmix32bit; + case IPC4_CHANNEL_CONFIG_4_POINT_0: + return downmix32bit_4_0; + case IPC4_CHANNEL_CONFIG_5_POINT_0: + return downmix32bit_5_0_mono; + case IPC4_CHANNEL_CONFIG_5_POINT_1: + return downmix32bit_5_1; + case IPC4_CHANNEL_CONFIG_7_POINT_1: + return downmix32bit_7_1; + case IPC4_CHANNEL_CONFIG_INVALID: + default: + comp_err(dev, "select_mix_out_stereo(): invalid channel config."); + /* + * This is a strange situation. We will allow to process it + * in the release code (hoping for the best) with downmix32bit, + * but will log err in debug to double check if everything is ok. + */ + return NULL; + } + } +} + +static up_down_mixer_routine select_mix_out_mono(struct comp_dev *dev, + const struct ipc4_audio_format *format) +{ + if (format->depth == IPC4_DEPTH_16BIT) { + switch (format->ch_cfg) { + case IPC4_CHANNEL_CONFIG_STEREO: + return downmix16bit_stereo; + case IPC4_CHANNEL_CONFIG_3_POINT_1: + case IPC4_CHANNEL_CONFIG_QUATRO: + case IPC4_CHANNEL_CONFIG_4_POINT_0: + return downmix16bit_4ch_mono; + case IPC4_CHANNEL_CONFIG_INVALID: + default: + comp_err(dev, "select_mix_out_mono(): invalid channel config."); + /* + * This is a strange situation. We will allow to process it + * in the release code (hoping for the best) with downmix16bit, + * but will log err in debug to double check if everything is ok + */ + return NULL; + } + } else { + switch (format->ch_cfg) { + case IPC4_CHANNEL_CONFIG_DUAL_MONO: + case IPC4_CHANNEL_CONFIG_STEREO: + return downmix32bit_stereo; + case IPC4_CHANNEL_CONFIG_3_POINT_1: + return downmix32bit_3_1_mono; + case IPC4_CHANNEL_CONFIG_QUATRO: + return downmix32bit_quatro_mono; + case IPC4_CHANNEL_CONFIG_4_POINT_0: + return downmix32bit_4_0_mono; + case IPC4_CHANNEL_CONFIG_5_POINT_0: + return downmix32bit_5_0_mono; + case IPC4_CHANNEL_CONFIG_5_POINT_1: + return downmix32bit_5_1_mono; + case IPC4_CHANNEL_CONFIG_7_POINT_1: + return downmix32bit_7_1_mono; + case IPC4_CHANNEL_CONFIG_INVALID: + default: + comp_err(dev, "select_mix_out_mono(): invalid channel config."); + /* + * This is a strange situation. We will allow to process it + * in the release code (hoping for the best) with downmix32bit, + * but will log err in debug to double check if everything is ok. + */ + return NULL; + } + } +} + +static up_down_mixer_routine select_mix_out_5_1(struct comp_dev *dev, + const struct ipc4_audio_format *format) +{ + if (format->depth == IPC4_DEPTH_16BIT) { + switch (format->ch_cfg) { + case IPC4_CHANNEL_CONFIG_MONO: + return upmix16bit_1_to_5_1; + case IPC4_CHANNEL_CONFIG_STEREO: + return upmix16bit_2_0_to_5_1; + case IPC4_CHANNEL_CONFIG_INVALID: + default: + comp_err(dev, "select_mix_out_mono(): invalid channel config."); + return NULL; + } + } else { + switch (format->ch_cfg) { + case IPC4_CHANNEL_CONFIG_MONO: + return upmix32bit_1_to_5_1; + case IPC4_CHANNEL_CONFIG_STEREO: + return upmix32bit_2_0_to_5_1; + case IPC4_CHANNEL_CONFIG_QUATRO: + return upmix32bit_quatro_to_5_1; + case IPC4_CHANNEL_CONFIG_4_POINT_0: + return upmix32bit_4_0_to_5_1; + case IPC4_CHANNEL_CONFIG_7_POINT_1: + return downmix32bit_7_1_to_5_1; + case IPC4_CHANNEL_CONFIG_INVALID: + default: + comp_err(dev, "select_mix_out_mono(): invalid channel config."); + return NULL; + } + } +} + +static int init_mix(struct comp_dev *dev, + const struct ipc4_audio_format *format, + enum ipc4_channel_config out_channel_config, + const downmix_coefficients downmix_coefficients) +{ + struct up_down_mixer_data *cd = comp_get_drvdata(dev); + int ret; + + if (!format) + return -EINVAL; + + if (out_channel_config == IPC4_CHANNEL_CONFIG_MONO) { + /* Select dowm mixing routine. */ + cd->mix_routine = select_mix_out_mono(dev, format); + + /* Update audio format. */ + cd->out_fmt[0].channels_count = 1; + cd->out_fmt[0].ch_cfg = IPC4_CHANNEL_CONFIG_MONO; + cd->out_fmt[0].ch_map = create_channel_map(IPC4_CHANNEL_CONFIG_MONO); + + } else if (out_channel_config == IPC4_CHANNEL_CONFIG_STEREO) { + /* DOWN_MIX */ + if (format->interleaving_style != IPC4_CHANNELS_INTERLEAVED || + format->depth == IPC4_DEPTH_8BIT) + return -EINVAL; + + /* Select dowm mixing routine. */ + cd->mix_routine = select_mix_out_stereo(dev, format); + + /* Update audio format. */ + cd->out_fmt[0].channels_count = 2; + cd->out_fmt[0].ch_cfg = IPC4_CHANNEL_CONFIG_STEREO; + cd->out_fmt[0].ch_map = create_channel_map(IPC4_CHANNEL_CONFIG_STEREO); + + } else if (out_channel_config == IPC4_CHANNEL_CONFIG_5_POINT_1) { + /* Select dowm mixing routine. */ + cd->mix_routine = select_mix_out_5_1(dev, format); + + /* Update audio format. */ + cd->out_fmt[0].channels_count = 6; + cd->out_fmt[0].ch_cfg = IPC4_CHANNEL_CONFIG_5_POINT_1; + cd->out_fmt[0].ch_map = create_channel_map(IPC4_CHANNEL_CONFIG_5_POINT_1); + + } else if (out_channel_config == IPC4_CHANNEL_CONFIG_7_POINT_1 && + format->ch_cfg == IPC4_CHANNEL_CONFIG_STEREO) { + /* Select up mixing routine. */ + cd->mix_routine = upmix32bit_2_0_to_7_1; + + if (format->depth == IPC4_DEPTH_16BIT) + return -EINVAL; + } else { + return -EINVAL; + } + + /* Update audio format. */ + cd->out_fmt[0].valid_bit_depth = IPC4_DEPTH_24BIT; + cd->out_fmt[0].depth = IPC4_DEPTH_32BIT; + + cd->in_channel_no = format->channels_count; + cd->in_channel_map = format->ch_map; + cd->in_channel_config = format->ch_cfg; + + ret = set_downmix_coefficients(dev, format, out_channel_config, downmix_coefficients); + if (ret < 0) + return ret; + + return 0; +} + +static void up_down_mixer_free(struct comp_dev *dev) +{ + struct up_down_mixer_data *cd = comp_get_drvdata(dev); + + rfree(cd->buf_in); + rfree(cd->buf_out); + rfree(cd); + rfree(dev); +} + +static int init_up_down_mixer(struct comp_dev *dev, struct comp_ipc_config *config, void *spec) +{ + struct ipc4_up_down_mixer_module_cfg *up_down_mixer = spec; + struct up_down_mixer_data *cd; + int ret; + + dev->ipc_config = *config; + list_init(&dev->bsource_list); + list_init(&dev->bsink_list); + + dcache_invalidate_region(spec, sizeof(*up_down_mixer)); + cd = rzalloc(SOF_MEM_ZONE_RUNTIME, 0, SOF_MEM_CAPS_RAM, sizeof(*cd)); + if (!cd) { + comp_free(dev); + return -ENOMEM; + } + + comp_set_drvdata(dev, cd); + + /* Copy received data format to local structures */ + ret = memcpy_s(&cd->base, sizeof(struct ipc4_base_module_cfg), + &up_down_mixer->base_cfg, + sizeof(struct ipc4_base_module_cfg)); + if (ret < 0) { + up_down_mixer_free(dev); + return ret; + } + + cd->buf_in = rballoc(0, SOF_MEM_CAPS_RAM, cd->base.ibs); + cd->buf_out = rballoc(0, SOF_MEM_CAPS_RAM, cd->base.obs); + if (!cd->buf_in || !cd->buf_out) { + up_down_mixer_free(dev); + return -ENOMEM; + } + + switch (up_down_mixer->coefficients_select) { + case DEFAULT_COEFFICIENTS: + cd->out_channel_map = create_channel_map(up_down_mixer->out_channel_config); + ret = init_mix(dev, &up_down_mixer->base_cfg.audio_fmt, up_down_mixer->out_channel_config, NULL); + break; + case CUSTOM_COEFFICIENTS: + cd->out_channel_map = create_channel_map(up_down_mixer->out_channel_config); + ret = init_mix(dev, &up_down_mixer->base_cfg.audio_fmt, up_down_mixer->out_channel_config, + up_down_mixer->coefficients); + break; + case DEFAULT_COEFFICIENTS_WITH_CHANNEL_MAP: + cd->out_channel_map = up_down_mixer->channel_map; + ret = init_mix(dev, &up_down_mixer->base_cfg.audio_fmt, up_down_mixer->out_channel_config, NULL); + break; + case CUSTOM_COEFFICIENTS_WITH_CHANNEL_MAP: + cd->out_channel_map = up_down_mixer->channel_map; + ret = init_mix(dev, &up_down_mixer->base_cfg.audio_fmt, up_down_mixer->out_channel_config, + up_down_mixer->coefficients); + break; + default: + comp_err(dev, "init_up_down_mixer(): unsupported coefficient type"); + up_down_mixer_free(dev); + return -EINVAL; + } + + if (!cd->mix_routine || ret < 0) { + up_down_mixer_free(dev); + comp_err(dev, "init_up_down_mixer(): mix routine uninitialized."); + return -EINVAL; + } + + return 0; +} + +static struct comp_dev *up_down_mixer_new(const struct comp_driver *drv, + struct comp_ipc_config *config, + void *spec) +{ + struct comp_dev *dev; + int ret; + + comp_cl_info(&comp_up_down_mixer, "up_down_mixer_new()"); + + dev = comp_alloc(drv, sizeof(*dev)); + if (!dev) + return NULL; + + ret = init_up_down_mixer(dev, config, spec); + if (ret < 0) { + comp_free(dev); + return NULL; + } + dev->state = COMP_STATE_READY; + + return dev; +} + +static int up_down_mixer_prepare(struct comp_dev *dev) +{ + int ret; + + if (dev->state == COMP_STATE_ACTIVE) { + comp_info(dev, "up_down_mixer_prepare(): Component is in active state."); + return 0; + } + + ret = comp_set_state(dev, COMP_TRIGGER_PREPARE); + if (ret < 0) + return ret; + + if (ret == COMP_STATUS_STATE_ALREADY_SET) + return PPL_STATUS_PATH_STOP; + + return 0; +} + +static int up_down_mixer_reset(struct comp_dev *dev) +{ + return 0; +} + +static int up_down_mixer_copy(struct comp_dev *dev) +{ + struct comp_buffer *source; + struct comp_buffer *sink; + uint32_t source_bytes, sink_bytes; + uint32_t mix_frames; + + struct up_down_mixer_data *cd = comp_get_drvdata(dev); + + comp_dbg(dev, "up_down_mixer_copy()"); + + source = list_first_item(&dev->bsource_list, struct comp_buffer, + sink_list); + sink = list_first_item(&dev->bsink_list, struct comp_buffer, + source_list); + + mix_frames = MIN(audio_stream_get_avail_frames(&source->stream), + audio_stream_get_free_frames(&sink->stream)); + + source_bytes = mix_frames * audio_stream_frame_bytes(&source->stream); + sink_bytes = mix_frames * audio_stream_frame_bytes(&sink->stream); + + if (!source_bytes) + return 0; + + buffer_stream_invalidate(source, source_bytes); + audio_stream_copy_to_linear(&source->stream, 0, cd->buf_in, 0, + source_bytes / audio_stream_sample_bytes(&source->stream)); + + cd->mix_routine(cd, (uint8_t *)cd->buf_in, source_bytes, (uint8_t *)cd->buf_out); + + audio_stream_copy_from_linear(cd->buf_out, 0, &sink->stream, 0, + sink_bytes / audio_stream_sample_bytes(&sink->stream)); + buffer_stream_writeback(sink, sink_bytes); + + comp_update_buffer_produce(sink, sink_bytes); + comp_update_buffer_consume(source, source_bytes); + + return 0; +} + +static int up_down_mixer_trigger(struct comp_dev *dev, int cmd) +{ + return comp_set_state(dev, cmd); +} + +static const struct comp_driver comp_up_down_mixer = { + .uid = SOF_RT_UUID(up_down_mixer_comp_uuid), + .tctx = &up_down_mixer_comp_tr, + .ops = { + .create = up_down_mixer_new, + .free = up_down_mixer_free, + .trigger = up_down_mixer_trigger, + .copy = up_down_mixer_copy, + .prepare = up_down_mixer_prepare, + .reset = up_down_mixer_reset, + }, +}; + +static SHARED_DATA struct comp_driver_info comp_up_down_mixer_info = { + .drv = &comp_up_down_mixer, +}; + +UT_STATIC void sys_comp_up_down_mixer_init(void) +{ + comp_register(platform_shared_get(&comp_up_down_mixer_info, + sizeof(comp_up_down_mixer_info))); +} + +DECLARE_MODULE(sys_comp_up_down_mixer_init); diff --git a/src/audio/up_down_mixer/up_down_mixer_hifi3.c b/src/audio/up_down_mixer/up_down_mixer_hifi3.c new file mode 100644 index 000000000000..f68eeb63a3af --- /dev/null +++ b/src/audio/up_down_mixer/up_down_mixer_hifi3.c @@ -0,0 +1,1920 @@ +// SPDX-License-Identifier: BSD-3-Clause +// +// Copyright(c) 2022 Intel Corporation. All rights reserved. +// +// Author: Bartosz Kokoszko + +#include + +#if defined(__XCC__) && XCHAL_HAVE_HIFI3 + +#include +#include +#include +#include + +void upmix32bit_1_to_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + uint32_t i; + + channel_map out_channel_map = cd->out_channel_map; + + /* Only load the channel if it's present. */ + ae_int32 *output_left = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_LEFT) << 2)); + ae_int32 *output_center = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_CENTER) << 2)); + ae_int32 *output_right = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_RIGHT) << 2)); + ae_int32 *output_left_surround = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_LEFT_SURROUND) << 2)); + ae_int32 *output_right_surround = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_RIGHT_SURROUND) << 2)); + ae_int32 *output_lfe = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_LFE) << 2)); + + ae_int32 *in_ptr = (ae_int32 *)in_data; + + for (i = 0; i < (in_size >> 2); ++i) { + output_left[i * 6] = in_ptr[i]; + output_right[i * 6] = in_ptr[i]; + output_center[i * 6] = 0; + output_left_surround[i * 6] = in_ptr[i]; + output_right_surround[i * 6] = in_ptr[i]; + output_lfe[i * 6] = 0; + } +} + +void upmix16bit_1_to_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + uint32_t i; + + channel_map out_channel_map = cd->out_channel_map; + + /* Only load the channel if it's present. */ + ae_int32 *output_left = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_LEFT) << 2)); + ae_int32 *output_center = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_CENTER) << 2)); + ae_int32 *output_right = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_RIGHT) << 2)); + ae_int32 *output_left_surround = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_LEFT_SURROUND) << 2)); + ae_int32 *output_right_surround = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_RIGHT_SURROUND) << 2)); + ae_int32 *output_lfe = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_LFE) << 2)); + + ae_int16 *in_ptr = (ae_int16 *)in_data; + + for (i = 0; i < (in_size >> 1); ++i) { + output_left[i * 6] = AE_MOVINT32_FROMINT16(in_ptr[i]) << 16; + output_right[i * 6] = AE_MOVINT32_FROMINT16(in_ptr[i]) << 16; + output_center[i * 6] = 0; + output_left_surround[i * 6] = AE_MOVINT32_FROMINT16(in_ptr[i]) << 16; + output_right_surround[i * 6] = AE_MOVINT32_FROMINT16(in_ptr[i]) << 16; + output_lfe[i * 6] = 0; + } +} + +void upmix32bit_2_0_to_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + uint32_t i; + + channel_map out_channel_map = cd->out_channel_map; + + const uint8_t left_slot = get_channel_location(out_channel_map, CHANNEL_LEFT); + const uint8_t center_slot = get_channel_location(out_channel_map, CHANNEL_CENTER); + const uint8_t right_slot = get_channel_location(out_channel_map, CHANNEL_RIGHT); + uint8_t left_surround_slot = get_channel_location(out_channel_map, CHANNEL_LEFT_SURROUND); + uint8_t right_surround_slot = get_channel_location(out_channel_map, CHANNEL_RIGHT_SURROUND); + const uint8_t lfe_slot = get_channel_location(out_channel_map, CHANNEL_LFE); + + /* Must support also 5.1 Surround */ + if (left_surround_slot == CHANNEL_INVALID && right_surround_slot == CHANNEL_INVALID) { + left_surround_slot = get_channel_location(out_channel_map, CHANNEL_LEFT_SIDE); + right_surround_slot = get_channel_location(out_channel_map, CHANNEL_RIGHT_SIDE); + } + + ae_int32 *output_left = (ae_int32 *)(out_data + (left_slot << 2)); + ae_int32 *output_center = (ae_int32 *)(out_data + (center_slot << 2)); + ae_int32 *output_right = (ae_int32 *)(out_data + (right_slot << 2)); + ae_int32 *output_left_surround = (ae_int32 *)(out_data + (left_surround_slot << 2)); + ae_int32 *output_right_surround = (ae_int32 *)(out_data + (right_surround_slot << 2)); + ae_int32 *output_lfe = (ae_int32 *)(out_data + (lfe_slot << 2)); + + ae_int32 *in_left_ptr = (ae_int32 *)in_data; + ae_int32 *in_right_ptr = (ae_int32 *)(in_data + 4); + + for (i = 0; i < (in_size >> 3); ++i) { + output_left[i * 6] = in_left_ptr[i * 2]; + output_right[i * 6] = in_right_ptr[i * 2]; + output_center[i * 6] = 0; + output_left_surround[i * 6] = in_left_ptr[i * 2]; + output_right_surround[i * 6] = in_right_ptr[i * 2]; + output_lfe[i * 6] = 0; + } +} + +void upmix16bit_2_0_to_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + uint32_t i; + + channel_map out_channel_map = cd->out_channel_map; + + const uint8_t left_slot = get_channel_location(out_channel_map, CHANNEL_LEFT); + const uint8_t center_slot = get_channel_location(out_channel_map, CHANNEL_CENTER); + const uint8_t right_slot = get_channel_location(out_channel_map, CHANNEL_RIGHT); + uint8_t left_surround_slot = get_channel_location(out_channel_map, CHANNEL_LEFT_SURROUND); + uint8_t right_surround_slot = get_channel_location(out_channel_map, CHANNEL_RIGHT_SURROUND); + const uint8_t lfe_slot = get_channel_location(out_channel_map, CHANNEL_LFE); + + /* Must support also 5.1 Surround */ + if (left_surround_slot == CHANNEL_INVALID && right_surround_slot == CHANNEL_INVALID) { + left_surround_slot = get_channel_location(out_channel_map, CHANNEL_LEFT_SIDE); + right_surround_slot = get_channel_location(out_channel_map, CHANNEL_RIGHT_SIDE); + } + + ae_int32 *output_left = (ae_int32 *)(out_data + (left_slot << 2)); + ae_int32 *output_center = (ae_int32 *)(out_data + (center_slot << 2)); + ae_int32 *output_right = (ae_int32 *)(out_data + (right_slot << 2)); + ae_int32 *output_left_surround = (ae_int32 *)(out_data + (left_surround_slot << 2)); + ae_int32 *output_right_surround = (ae_int32 *)(out_data + (right_surround_slot << 2)); + ae_int32 *output_lfe = (ae_int32 *)(out_data + (lfe_slot << 2)); + + ae_int16 *in_left_ptr = (ae_int16 *)in_data; + ae_int16 *in_right_ptr = (ae_int16 *)(in_data + 2); + + for (i = 0; i < (in_size >> 2); ++i) { + output_left[i * 6] = AE_MOVINT32_FROMINT16(in_left_ptr[i * 2]) << 16; + output_right[i * 6] = AE_MOVINT32_FROMINT16(in_right_ptr[i * 2]) << 16; + output_center[i * 6] = 0; + output_left_surround[i * 6] = AE_MOVINT32_FROMINT16(in_left_ptr[i * 2]) << 16; + output_right_surround[i * 6] = AE_MOVINT32_FROMINT16(in_right_ptr[i * 2]) << 16; + output_lfe[i * 6] = 0; + } +} + +void upmix32bit_2_0_to_7_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + uint32_t i; + + channel_map out_channel_map = cd->out_channel_map; + + /* Only load the channel if it's present. */ + ae_int32 *output_left = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_LEFT) << 2)); + ae_int32 *output_center = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_CENTER) << 2)); + ae_int32 *output_right = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_RIGHT) << 2)); + ae_int32 *output_left_surround = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_LEFT_SURROUND) << 2)); + ae_int32 *output_right_surround = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_RIGHT_SURROUND) << 2)); + ae_int32 *output_lfe = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_LFE) << 2)); + ae_int32 *output_left_side = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_LEFT_SIDE) << 2)); + ae_int32 *output_right_side = (ae_int32 *)(out_data + + (get_channel_location(out_channel_map, CHANNEL_RIGHT_SIDE) << 2)); + + ae_int32 *in_left_ptr = (ae_int32 *)in_data; + ae_int32 *in_right_ptr = (ae_int32 *)(in_data + 4); + + for (i = 0; i < (in_size >> 3); ++i) { + output_left[i * 8] = in_left_ptr[i * 2]; + output_right[i * 8] = in_right_ptr[i * 2]; + output_center[i * 8] = 0; + output_left_surround[i * 8] = in_left_ptr[i * 2]; + output_right_surround[i * 8] = in_right_ptr[i * 2]; + output_lfe[i * 8] = 0; + output_left_side[i * 8] = 0; + output_right_side[i * 8] = 0; + } +} + +void shiftcopy32bit_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + size_t i; + + ae_p24f *in_ptr = (ae_p24f *)in_data; + ae_p24x2f *out_ptr = (ae_p24x2f *)out_data; + + for (i = 0; i < (in_size >> 2); ++i) + out_ptr[i] = in_ptr[i]; +} + +void shiftcopy32bit_stereo(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + uint32_t i; + + ae_p24x2f *in_ptr = (ae_p24x2f *)in_data; + ae_p24x2f *out_ptr = (ae_p24x2f *)out_data; + + for (i = 0; i < (in_size >> 3); ++i) + out_ptr[i] = in_ptr[i]; +} + +void downmix32bit_2_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_lfe; + + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + ae_int32x2 P_coefficient_lfe_tmp = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LFE << 2); + + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_lfe = AE_SEL32_LL(P_coefficient_lfe_tmp, P_coefficient_lfe_tmp); + + const ae_int32 *input_left = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT) << 2)); + const ae_int32 *input_right = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT) << 2)); + const ae_int32 *input_lfe = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LFE) << 2)); + + ae_int32 *output_left = (ae_int32 *)(out_data); + ae_int32 *output_right = (ae_int32 *)(out_data + sizeof(ae_int32)); + + ae_int32x2 P_input_left; + ae_int32x2 P_input_right; + ae_int32x2 P_input_lfe; + + ae_int32x2 P_output_left; + ae_int32x2 P_output_right; + + const ae_int32 *const end_input_left = input_left + (in_size / (sizeof(ae_int32))); + + while (input_left < end_input_left) { + /* update output left channel based on input left channel */ + AE_L32_IP(P_input_left, input_left, 12); + ae_f64 Q_tmp_left = AE_MULF32S_LH(P_input_left, P_coefficient_left_right); + + /* update output right channel based on input right channel */ + AE_L32_IP(P_input_right, input_right, 12); + ae_f64 Q_tmp_right = AE_MULF32S_LL(P_input_right, P_coefficient_left_right); + + /* update output left and right channels based on input lfe channel */ + AE_L32_IP(P_input_lfe, input_lfe, 12); + AE_MULAF32S_LL(Q_tmp_left, P_input_lfe, P_coefficient_lfe); + AE_MULAF32S_LL(Q_tmp_right, P_input_lfe, P_coefficient_lfe); + + P_output_left = AE_ROUND32F64SSYM(Q_tmp_left); + P_output_right = AE_ROUND32F64SSYM(Q_tmp_right); + + AE_S32_L_IP(P_output_left, output_left, 2 * 4); + AE_S32_L_IP(P_output_right, output_right, 2 * 4); + } +} + +void downmix32bit_3_0(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_center; + + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_center_tmp = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_center = AE_SEL32_LL(P_coefficient_center_tmp, P_coefficient_center_tmp); + + const ae_int32 *input_left = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT) << 2)); + const ae_int32 *input_center = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER) << 2)); + const ae_int32 *input_right = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT) << 2)); + + ae_int32 *output_left = (ae_int32 *)(out_data); + ae_int32 *output_right = (ae_int32 *)(out_data + sizeof(ae_int32)); + + ae_int32x2 P_input_left; + ae_int32x2 P_input_center; + ae_int32x2 P_input_right; + + ae_int32x2 P_output_left; + ae_int32x2 P_output_right; + + const ae_int32 *const end_input_left = input_left + (in_size / (sizeof(ae_int32))); + + while (input_left < end_input_left) { + ae_f64 Q_tmp_left; + ae_f64 Q_tmp_right; + + /* update output left channel based on input left channel */ + AE_L32_IP(P_input_left, input_left, 3 * sizeof(ae_int32)); + Q_tmp_left = AE_MULF32S_LH(P_input_left, P_coefficient_left_right); + + /* update output left and right channels based on input center channel */ + AE_L32_IP(P_input_center, input_center, 3 * sizeof(ae_int32)); + AE_MULAF32S_LH(Q_tmp_left, P_input_center, P_coefficient_center); + Q_tmp_right = AE_MULF32S_LH(P_input_center, P_coefficient_center); + + /* update output right channel based on input right channel */ + AE_L32_IP(P_input_right, input_right, 3 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp_right, P_input_right, P_coefficient_left_right); + + P_output_left = AE_ROUND32F64SSYM(Q_tmp_left); + P_output_right = AE_ROUND32F64SSYM(Q_tmp_right); + + AE_S32_L_IP(P_output_left, output_left, 2 * sizeof(ae_int32)); + AE_S32_L_IP(P_output_right, output_right, 2 * sizeof(ae_int32)); + } +} + +void downmix32bit_3_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_center_lfe; + + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_center = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + ae_int32x2 P_coefficient_lfe = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LFE << 2); + + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_center_lfe = AE_SEL32_LL(P_coefficient_center, P_coefficient_lfe); + + const ae_int32 *input_left = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT) << 2)); + const ae_int32 *input_center = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER) << 2)); + const ae_int32 *input_right = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT) << 2)); + const ae_int32 *input_lfe = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LFE) << 2)); + + ae_int32 *output_left = (ae_int32 *)(out_data); + ae_int32 *output_right = (ae_int32 *)(out_data + sizeof(ae_int32)); + + ae_int32x2 P_input_left; + ae_int32x2 P_input_center; + ae_int32x2 P_input_right; + ae_int32x2 P_input_lfe; + + ae_int32x2 P_output_left; + ae_int32x2 P_output_right; + + const ae_int32 *const end_input_left = input_left + (in_size / (sizeof(ae_int32))); + + while (input_left < end_input_left) { + ae_f64 Q_tmp_left; + ae_f64 Q_tmp_right; + + AE_L32_IP(P_input_left, input_left, 4 * sizeof(ae_int32)); + Q_tmp_left = AE_MULF32S_LH(P_input_left, P_coefficient_left_right); + + AE_L32_IP(P_input_center, input_center, 4 * sizeof(ae_int32)); + AE_MULAF32S_LH(Q_tmp_left, P_input_center, P_coefficient_center_lfe); + Q_tmp_right = AE_MULF32S_LH(P_input_center, P_coefficient_center_lfe); + + AE_L32_IP(P_input_right, input_right, 4 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp_right, P_input_right, P_coefficient_left_right); + + AE_L32_IP(P_input_lfe, input_lfe, 4 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp_left, P_input_lfe, P_coefficient_center_lfe); + AE_MULAF32S_LL(Q_tmp_right, P_input_lfe, P_coefficient_center_lfe); + + P_output_left = AE_ROUND32F64SSYM(Q_tmp_left); + P_output_right = AE_ROUND32F64SSYM(Q_tmp_right); + + AE_S32_L_IP(P_output_left, output_left, 2 * sizeof(ae_int32)); + AE_S32_L_IP(P_output_right, output_right, 2 * sizeof(ae_int32)); + } +} + +void downmix32bit(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + uint32_t i; + + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_left_s_right_s; + ae_int32x2 P_coefficient_center_lfe; + + /* Load the downmix coefficients. */ + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_center = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + ae_int32x2 P_coefficient_left_surround = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT_SURROUND << 2); + ae_int32x2 P_coefficient_right_surround = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT_SURROUND << 2); + ae_int32x2 P_coefficient_lfe = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LFE << 2); + + /* + * We have 6 coefficients (constant inside loop), 6 channels and only 8 + * AE_P registers. But each of those registers is 48bit or 2x24bit wide. + * Also many Hifi2 operations can use either lower or higher 24 bits from + * those registers. By combining 6 coefficients in pairs we save 3 AE_P + * registers. + */ + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_left_s_right_s = AE_SEL32_LL(P_coefficient_left_surround, + P_coefficient_right_surround); + P_coefficient_center_lfe = AE_SEL32_LL(P_coefficient_center, P_coefficient_lfe); + + /* See what channels are available. */ + bool left = (get_channel_location(cd->in_channel_map, CHANNEL_LEFT) != 0xF); + bool center = (get_channel_location(cd->in_channel_map, CHANNEL_CENTER) != 0xF); + bool right = (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT) != 0xF); + bool left_surround = + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT_SURROUND) != 0xF); + bool right_surround = + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT_SURROUND) != 0xF); + bool lfe = (get_channel_location(cd->in_channel_map, CHANNEL_LFE) != 0xF); + + /* Downmixer single load. */ + ae_int32 *input_left = NULL; + ae_int32 *input_center = NULL; + ae_int32 *input_right = NULL; + ae_int32 *input_left_surround = NULL; + ae_int32 *input_right_surround = NULL; + ae_int32 *input_lfe = NULL; + + /* Only load the channel if it's present. */ + if (left) { + input_left = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT) << 2)); + } + + if (center) { + input_center = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER) << 2)); + } + if (right) { + input_right = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT) << 2)); + } + if (left_surround) { + input_left_surround = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT_SURROUND) << 2)); + } + if (right_surround) { + input_right_surround = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT_SURROUND) << 2)); + } + if (lfe) { + input_lfe = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LFE) << 2)); + } + + /** Calculate number of samples in a single channel. */ + uint32_t number_of_samples_in_one_channel = in_size / cd->in_channel_no; + + number_of_samples_in_one_channel >>= 2; + + /* Downmixer single store. */ + ae_int32 *output_left = (ae_int32 *)(out_data); + ae_int32 *output_right = (ae_int32 *)(out_data + sizeof(ae_int32)); + + /* We will be using P & Q registers. */ + ae_int32x2 P_input_left; + ae_int32x2 P_input_center; + ae_int32x2 P_input_right; + ae_int32x2 P_input_left_surround; + ae_int32x2 P_input_right_surround; + ae_int32x2 P_input_lfe; + + ae_int32x2 P_output_left; + ae_int32x2 P_output_right; + /* + * Calculating this outside of the loop is significant performance + * improvement. The value of this expression was reevaluated in each + * * iteration when placed inside loop. + */ + int sample_offset = cd->in_channel_no << 2; + + for (i = 0; i < number_of_samples_in_one_channel; i++) { + /* Zero-out the Q register first. */ + ae_f64 Q_tmp_left = AE_ZERO64(); + ae_f64 Q_tmp_right = AE_ZERO64(); + + /* Load one 24-bit value, replicate in two elements of register P. */ + if (left) { + P_input_left = AE_L32_X(input_left, i * sample_offset); + AE_MULAF32S_LH(Q_tmp_left, P_input_left, P_coefficient_left_right); + } + if (center) { + P_input_center = AE_L32_X(input_center, i * sample_offset); + AE_MULAF32S_LH(Q_tmp_left, P_input_center, P_coefficient_center_lfe); + AE_MULAF32S_LH(Q_tmp_right, P_input_center, P_coefficient_center_lfe); + } + if (right) { + P_input_right = AE_L32_X(input_right, i * sample_offset); + AE_MULAF32S_LL(Q_tmp_right, P_input_right, P_coefficient_left_right); + } + if (left_surround) { + P_input_left_surround = AE_L32_X(input_left_surround, i * sample_offset); + AE_MULAF32S_LH(Q_tmp_left, P_input_left_surround, + P_coefficient_left_s_right_s); + + if (cd->in_channel_config == IPC4_CHANNEL_CONFIG_4_POINT_0) { + AE_MULAF32S_LH(Q_tmp_right, + P_input_left_surround, + P_coefficient_left_s_right_s); + } + } + if (right_surround) { + P_input_right_surround = + AE_L32_X(input_right_surround, i * sample_offset); + AE_MULAF32S_LL(Q_tmp_right, P_input_right_surround, + P_coefficient_left_s_right_s); + } + if (lfe) { + P_input_lfe = AE_L32_X(input_lfe, i * sample_offset); + AE_MULAF32S_LL(Q_tmp_left, P_input_lfe, P_coefficient_center_lfe); + AE_MULAF32S_LL(Q_tmp_right, P_input_lfe, P_coefficient_center_lfe); + } + + P_output_left = AE_ROUND32F64SSYM(Q_tmp_left); + P_output_right = AE_ROUND32F64SSYM(Q_tmp_right); + + output_left[i * 2] = P_output_left; + output_right[i * 2] = P_output_right; + } +} + +void downmix32bit_4_0(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_left_s_right_s; + ae_int32x2 P_coefficient_center_lfe; + + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_center = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + ae_int32x2 P_coefficient_left_surround = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT_SURROUND << 2); + ae_int32x2 P_coefficient_right_surround = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT_SURROUND << 2); + ae_int32x2 P_coefficient_lfe = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LFE << 2); + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_left_s_right_s = AE_SEL32_LL(P_coefficient_left_surround, + P_coefficient_right_surround); + P_coefficient_center_lfe = AE_SEL32_LL(P_coefficient_center, P_coefficient_lfe); + + const ae_int32 *input_left = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT) << 2)); + const ae_int32 *input_center = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER) << 2)); + const ae_int32 *input_right = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT) << 2)); + const ae_int32 *input_left_surround = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT_SURROUND) << 2)); + + ae_int32 *output_left = (ae_int32 *)(out_data); + ae_int32 *output_right = (ae_int32 *)(out_data + sizeof(ae_int32)); + + ae_int32x2 P_input_left; + ae_int32x2 P_input_center; + ae_int32x2 P_input_right; + ae_int32x2 P_input_left_surround; + + ae_int32x2 P_output_left; + ae_int32x2 P_output_right; + + const ae_int32 *const end_input_left = input_left + (in_size / (sizeof(ae_int32))); + + while (input_left < end_input_left) { + ae_f64 Q_tmp_left; + ae_f64 Q_tmp_right; + + AE_L32_IP(P_input_left, input_left, 4 * sizeof(ae_int32)); + Q_tmp_left = AE_MULF32S_LH(P_input_left, P_coefficient_left_right); + + AE_L32_IP(P_input_center, input_center, 4 * sizeof(ae_int32)); + AE_MULAF32S_LH(Q_tmp_left, P_input_center, P_coefficient_center_lfe); + Q_tmp_right = AE_MULF32S_LH(P_input_center, P_coefficient_center_lfe); + + AE_L32_IP(P_input_right, input_right, 4 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp_right, P_input_right, P_coefficient_left_right); + + /* for 4.0 left surround if propagated to both left and right output channels */ + AE_L32_IP(P_input_left_surround, input_left_surround, 4 * sizeof(ae_int32)); + AE_MULAF32S_LH(Q_tmp_left, P_input_left_surround, P_coefficient_left_s_right_s); + AE_MULAF32S_LH(Q_tmp_right, P_input_left_surround, P_coefficient_left_s_right_s); + + P_output_left = AE_ROUND32F64SSYM(Q_tmp_left); + P_output_right = AE_ROUND32F64SSYM(Q_tmp_right); + + AE_S32_L_IP(P_output_left, output_left, 2 * sizeof(ae_int32)); + AE_S32_L_IP(P_output_right, output_right, 2 * sizeof(ae_int32)); + } +} + +void downmix32bit_5_0_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + size_t i; + + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_center_cs; + + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_center = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + ae_int32x2 P_coefficient_cs = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER_SURROUND << 2); + + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_center_cs = AE_SEL32_LL(P_coefficient_center, P_coefficient_cs); + + const ae_int32 *input_left = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT) << 2)); + const ae_int32 *input_center = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER) << 2)); + const ae_int32 *input_right = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT) << 2)); + const ae_int32 *input_cs = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER_SURROUND) << 2)); + + ae_int32 *output = (ae_int32 *)(out_data); + + ae_int32x2 P_input_left; + ae_int32x2 P_input_center; + ae_int32x2 P_input_right; + ae_int32x2 P_input_cs; + + ae_int32x2 P_output; + + const uint32_t channel_no = 5; + + for (i = 0; i < in_size / (sizeof(ae_int32)); i += channel_no) { + ae_f64 Q_tmp; + + AE_L32_IP(P_input_left, input_left, 5 * sizeof(ae_int32)); + Q_tmp = AE_MULF32S_LH(P_input_left, P_coefficient_left_right); + + AE_L32_IP(P_input_center, input_center, 5 * sizeof(ae_int32)); + AE_MULAF32S_LH(Q_tmp, P_input_center, P_coefficient_center_cs); + + AE_L32_IP(P_input_right, input_right, 5 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp, P_input_right, P_coefficient_left_right); + + AE_L32_IP(P_input_cs, input_cs, 5 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp, P_input_cs, P_coefficient_center_cs); + + P_output = AE_ROUND32F64SSYM(Q_tmp); + + AE_S32_L_IP(P_output, output, sizeof(ae_int32)); + } +} + +void downmix32bit_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + const uint8_t left_slot = get_channel_location(cd->in_channel_map, CHANNEL_LEFT); + const uint8_t center_slot = get_channel_location(cd->in_channel_map, CHANNEL_CENTER); + const uint8_t right_slot = get_channel_location(cd->in_channel_map, CHANNEL_RIGHT); + uint8_t left_surround_slot = get_channel_location(cd->in_channel_map, CHANNEL_LEFT_SURROUND); + uint8_t right_surround_slot = get_channel_location(cd->in_channel_map, CHANNEL_RIGHT_SURROUND); + const uint8_t lfe_slot = get_channel_location(cd->in_channel_map, CHANNEL_LFE); + + /* Must support also 5.1 Surround */ + const bool surround_5_1_channel_map = (left_surround_slot == CHANNEL_INVALID) && + (right_surround_slot == CHANNEL_INVALID); + + if (surround_5_1_channel_map) { + left_surround_slot = get_channel_location(cd->in_channel_map, CHANNEL_LEFT_SIDE); + right_surround_slot = get_channel_location(cd->in_channel_map, CHANNEL_RIGHT_SIDE); + } + + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_left_s_right_s; + ae_int32x2 P_coefficient_center_lfe; + + /* Load the downmix coefficients. */ + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_center = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + ae_int32x2 P_coefficient_left_surround = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT_SURROUND << 2); + ae_int32x2 P_coefficient_right_surround = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT_SURROUND << 2); + ae_int32x2 P_coefficient_lfe = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LFE << 2); + + if (surround_5_1_channel_map) { + P_coefficient_left_surround = AE_L32_X((ae_int32 *)cd->downmix_coefficients, + CHANNEL_LEFT_SIDE << 2); + P_coefficient_right_surround = AE_L32_X((ae_int32 *)cd->downmix_coefficients, + CHANNEL_RIGHT_SIDE << 2); + } + + /* + * We have 6 coefficients (constant inside loop), 6 channels and only 8 + * AE_P registers. But each of those registers is 48bit or 2x24bit wide. + * Also many Hifi2 operations can use either lower or higher 24 bits from + * those registers. By combining 6 coefficients in pairs we save 3 AE_P + * registers. + */ + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_left_s_right_s = AE_SEL32_LL(P_coefficient_left_surround, + P_coefficient_right_surround); + P_coefficient_center_lfe = AE_SEL32_LL(P_coefficient_center, P_coefficient_lfe); + + const ae_int32 *input_left = (ae_int32 *)(in_data + (left_slot << 2)); + const ae_int32 *input_center = (ae_int32 *)(in_data + (center_slot << 2)); + const ae_int32 *input_right = (ae_int32 *)(in_data + (right_slot << 2)); + const ae_int32 *input_left_surround = (ae_int32 *)(in_data + (left_surround_slot << 2)); + const ae_int32 *input_right_surround = (ae_int32 *)(in_data + (right_surround_slot << 2)); + const ae_int32 *input_lfe = (ae_int32 *)(in_data + (lfe_slot << 2)); + + /* Downmixer single store. */ + ae_int32 *output_left = (ae_int32 *)(out_data); + ae_int32 *output_right = (ae_int32 *)(out_data + sizeof(ae_int32)); + + /* We will be using P & Q registers. */ + ae_int32x2 P_input_left; + ae_int32x2 P_input_center; + ae_int32x2 P_input_right; + ae_int32x2 P_input_left_surround; + ae_int32x2 P_input_right_surround; + ae_int32x2 P_input_lfe; + + ae_int32x2 P_output_left; + ae_int32x2 P_output_right; + + /* + * We don't need to initialize those registers in loop's body. + * Using non accumulating multiplication for the first left,rignt channels + * is more efficient. + * For non 5.1 version we cannot use this optimization, we don't know + * which channel is present and which multiplication should reset output + * accumulators. + */ + ae_f64 Q_tmp_left = AE_ZERO64(); + ae_f64 Q_tmp_right = AE_ZERO64(); + + const ae_int32 *const end_input_left = input_left + (in_size / (sizeof(ae_int32))); + + while (input_left < end_input_left) { + /* Load one 24-bit value, replicate in two elements of register P. */ + AE_L32_IP(P_input_center, input_center, 6 * sizeof(ae_int32)); + Q_tmp_left = AE_MULF32S_LH(P_input_center, P_coefficient_center_lfe); + + AE_L32_IP(P_input_lfe, input_lfe, 6 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp_left, P_input_lfe, P_coefficient_center_lfe); + + Q_tmp_right = Q_tmp_left; + + AE_L32_IP(P_input_left, input_left, 6 * sizeof(ae_int32)); + AE_MULAF32S_LH(Q_tmp_left, P_input_left, P_coefficient_left_right); + + AE_L32_IP(P_input_right, input_right, 6 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp_right, P_input_right, P_coefficient_left_right); + + AE_L32_IP(P_input_left_surround, input_left_surround, 6 * sizeof(ae_int32)); + AE_MULAF32S_LH(Q_tmp_left, P_input_left_surround, P_coefficient_left_s_right_s); + + AE_L32_IP(P_input_right_surround, input_right_surround, 6 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp_right, P_input_right_surround, P_coefficient_left_s_right_s); + + P_output_left = AE_ROUND32F64SSYM(Q_tmp_left); + P_output_right = AE_ROUND32F64SSYM(Q_tmp_right); + + AE_S32_L_IP(P_output_left, output_left, 2 * sizeof(ae_int32)); + AE_S32_L_IP(P_output_right, output_right, 2 * sizeof(ae_int32)); + } +} + +void downmix32bit_7_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_left_s_right_s; + ae_int32x2 P_coefficient_center_lfe; + ae_int32x2 P_coefficient_left_S_right_S; + + /* Load the downmix coefficients. */ + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_center = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + ae_int32x2 P_coefficient_left_surround = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT_SURROUND << 2); + ae_int32x2 P_coefficient_right_surround = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT_SURROUND << 2); + ae_int32x2 P_coefficient_lfe = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LFE << 2); + ae_int32x2 P_coefficient_left_side = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT_SIDE << 2); + ae_int32x2 P_coefficient_right_side = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT_SIDE << 2); + + /* + * We have 6 coefficients (constant inside loop), 6 channels and only 8 + * AE_P registers. But each of those registers is 48bit or 2x24bit wide. + * Also many Hifi2 operations can use either lower or higher 24 bits from + * those registers. By combining 6 coefficients in pairs we save 3 AE_P + * registers. + */ + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_left_s_right_s = AE_SEL32_LL(P_coefficient_left_surround, + P_coefficient_right_surround); + P_coefficient_center_lfe = AE_SEL32_LL(P_coefficient_center, P_coefficient_lfe); + P_coefficient_left_S_right_S = AE_SEL32_LL(P_coefficient_left_side, + P_coefficient_right_side); + + /* Only load the channel if it's present. */ + const ae_int32 *input_left = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT) << 2)); + const ae_int32 *input_center = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER) << 2)); + const ae_int32 *input_right = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT) << 2)); + const ae_int32 *input_left_surround = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT_SURROUND) << 2)); + const ae_int32 *input_right_surround = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT_SURROUND) << 2)); + const ae_int32 *input_lfe = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LFE) << 2)); + const ae_int32 *input_left_side = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT_SIDE) << 2)); + const ae_int32 *input_right_side = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT_SIDE) << 2)); + + /* Downmixer single store. */ + ae_int32 *output_left = (ae_int32 *)(out_data); + ae_int32 *output_right = (ae_int32 *)(out_data + sizeof(ae_int32)); + + /* We will be using P & Q registers. */ + ae_int32x2 P_input_left; + ae_int32x2 P_input_center; + ae_int32x2 P_input_right; + ae_int32x2 P_input_left_surround; + ae_int32x2 P_input_right_surround; + ae_int32x2 P_input_lfe; + ae_int32x2 P_input_left_side; + ae_int32x2 P_input_right_side; + + ae_int32x2 P_output_left; + ae_int32x2 P_output_right; + + /* + * We don't need to initialize those registers in loop's body. + * Using non accumulating multiplication for the first left,rignt channels + * is more efficient. + * For non 5.1 version we cannot use this optimization, we don't know + * which channel is present and which multiplication should reset output + * accumulators. + */ + ae_f64 Q_tmp_left = AE_ZERO64(); + ae_f64 Q_tmp_right = AE_ZERO64(); + + const ae_int32 *const end_input_left = input_left + (in_size / (sizeof(ae_int32))); + const char cs = 8 * sizeof(ae_int32); + + while (input_left < end_input_left) { + /* Load one 24-bit value, replicate in two elements of register P. */ + AE_L32_XP(P_input_center, input_center, cs); + Q_tmp_left = AE_MULF32S_LH(P_input_center, P_coefficient_center_lfe); + + AE_L32_XP(P_input_lfe, input_lfe, cs); + AE_MULAF32S_LL(Q_tmp_left, P_input_lfe, P_coefficient_center_lfe); + + Q_tmp_right = Q_tmp_left; + + AE_L32_XP(P_input_left, input_left, cs); + AE_MULAF32S_LH(Q_tmp_left, P_input_left, P_coefficient_left_right); + + AE_L32_XP(P_input_right, input_right, cs); + AE_MULAF32S_LL(Q_tmp_right, P_input_right, P_coefficient_left_right); + + AE_L32_XP(P_input_left_surround, input_left_surround, cs); + AE_MULAF32S_LH(Q_tmp_left, P_input_left_surround, P_coefficient_left_s_right_s); + + AE_L32_XP(P_input_right_surround, input_right_surround, cs); + AE_MULAF32S_LL(Q_tmp_right, P_input_right_surround, P_coefficient_left_s_right_s); + + AE_L32_XP(P_input_left_side, input_left_side, cs); + AE_MULAF32S_LH(Q_tmp_left, P_input_left_side, P_coefficient_left_S_right_S); + + AE_L32_XP(P_input_right_side, input_right_side, cs); + AE_MULAF32S_LL(Q_tmp_right, P_input_right_side, P_coefficient_left_S_right_S); + + P_output_left = AE_ROUND32F64SSYM(Q_tmp_left); + P_output_right = AE_ROUND32F64SSYM(Q_tmp_right); + + AE_S32_L_IP(P_output_left, output_left, 2 * sizeof(ae_int32)); + AE_S32_L_IP(P_output_right, output_right, 2 * sizeof(ae_int32)); + } +} + +void shiftcopy16bit_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + uint32_t i; + + ae_int16 *in_ptrs = (ae_int16 *)in_data; + ae_int32x2 *out_ptrs = (ae_int32x2 *)out_data; + + for (i = 0; i < (in_size >> 1); ++i) + out_ptrs[i] = AE_MOVINT32_FROMINT16(in_ptrs[i]) << 16; +} + +void shiftcopy16bit_stereo(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + uint32_t i; + + ae_p16x2s *in_ptrs = (ae_p16x2s *)in_data; + ae_p24x2f *out_ptrs = (ae_p24x2f *)out_data; + ae_p24x2s in_regs = AE_ZEROP48(); + + for (i = 0; i < (in_size >> 2); ++i) { + in_regs = in_ptrs[i]; + AE_SP24X2F_X(in_regs, out_ptrs, i << 3); + } +} + +void downmix16bit(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + uint32_t i; + + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_left_s_right_s; + ae_int32x2 P_coefficient_center_lfe; + + /* Load the downmix coefficients. */ + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_center = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + ae_int32x2 P_coefficient_left_surround = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT_SURROUND << 2); + ae_int32x2 P_coefficient_right_surround = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT_SURROUND << 2); + ae_int32x2 P_coefficient_lfe = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LFE << 2); + + /* + * We have 6 coefficients (constant inside loop), 6 channels and only 8 + * AE_P registers. But each of those registers is 48bit or 2x24bit wide. + * Also many Hifi2 operations can use either lower or higher 24 bits from + * those registers. By combining 6 coefficients in pairs we save 3 AE_P + * registers. + */ + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_left_s_right_s = AE_SEL32_LL(P_coefficient_left_surround, + P_coefficient_right_surround); + P_coefficient_center_lfe = AE_SEL32_LL(P_coefficient_center, P_coefficient_lfe); + + /* See what channels are available. */ + bool left = (get_channel_location(cd->in_channel_map, CHANNEL_LEFT) != 0xF); + bool center = (get_channel_location(cd->in_channel_map, CHANNEL_CENTER) != 0xF); + bool right = (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT) != 0xF); + bool left_surround = + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT_SURROUND) != 0xF); + bool right_surround = + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT_SURROUND) != 0xF); + bool lfe = (get_channel_location(cd->in_channel_map, CHANNEL_LFE) != 0xF); + + /* Downmixer single load. */ + ae_p16s *input_left = NULL; + ae_p16s *input_center = NULL; + ae_p16s *input_right = NULL; + ae_p16s *input_left_surround = NULL; + ae_p16s *input_right_surround = NULL; + ae_p16s *input_lfe = NULL; + + /* Only load the channel if it's present. */ + if (left) { + input_left = (ae_p16s *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT) << 1)); + } + if (center) { + input_center = (ae_p16s *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER) << 1)); + } + if (right) { + input_right = (ae_p16s *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT) << 1)); + } + if (left_surround) { + input_left_surround = (ae_p16s *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT_SURROUND) << 1)); + } + if (right_surround) { + input_right_surround = (ae_p16s *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT_SURROUND) << 1)); + } + if (lfe) { + input_lfe = (ae_p16s *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LFE) << 1)); + } + + /** Calculate number of samples in a single channel. */ + uint32_t number_of_samples_in_one_channel = in_size / cd->in_channel_no; + + number_of_samples_in_one_channel >>= 1; + + /* Downmixer single store. */ + ae_int32 *output_left = (ae_int32 *)(out_data + 0); + ae_int32 *output_right = (ae_int32 *)(out_data + sizeof(ae_int32)); + + ae_int32x2 P_input_left; + ae_int32x2 P_input_center; + ae_int32x2 P_input_right; + ae_int32x2 P_input_left_surround; + ae_int32x2 P_input_right_surround; + ae_int32x2 P_input_lfe; + + ae_int32x2 P_output_left; + ae_int32x2 P_output_right; + + /* + * Calculating this outside of the loop is significant performance + * improvement. The value of this expression was reevaluated in each + * iteration when placed inside loop. + */ + int sample_offset = cd->in_channel_no << 1; + + for (i = 0; i < (number_of_samples_in_one_channel); i++) { + ae_f64 Q_tmp_left = AE_ZEROQ56(); + ae_f64 Q_tmp_right = AE_ZEROQ56(); + + /* Load one 24-bit value, replicate in two elements of register P. */ + if (left) { + P_input_left = AE_L16M_X(input_left, i * sample_offset); + AE_MULAF32S_LH(Q_tmp_left, P_input_left, P_coefficient_left_right); + } + if (center) { + P_input_center = AE_L16M_X(input_center, i * sample_offset); + AE_MULAF32S_LH(Q_tmp_left, P_input_center, P_coefficient_center_lfe); + AE_MULAF32S_LH(Q_tmp_right, P_input_center, P_coefficient_center_lfe); + } + if (right) { + P_input_right = AE_L16M_X(input_right, i * sample_offset); + AE_MULAF32S_LL(Q_tmp_right, P_input_right, P_coefficient_left_right); + } + if (left_surround) { + P_input_left_surround = AE_L16M_X(input_left_surround, i * sample_offset); + AE_MULAF32S_LH(Q_tmp_left, P_input_left_surround, + P_coefficient_left_s_right_s); + if (cd->in_channel_config == IPC4_CHANNEL_CONFIG_4_POINT_0) { + AE_MULAF32S_LH(Q_tmp_right, P_input_left_surround, + P_coefficient_left_s_right_s); + } + } + if (right_surround) { + P_input_right_surround = AE_L16M_X(input_right_surround, i * sample_offset); + AE_MULAF32S_LL(Q_tmp_right, P_input_right_surround, + P_coefficient_left_s_right_s); + } + if (lfe) { + P_input_lfe = AE_L16M_X(input_lfe, i * sample_offset); + AE_MULAF32S_LL(Q_tmp_left, P_input_lfe, P_coefficient_center_lfe); + AE_MULAF32S_LL(Q_tmp_right, P_input_lfe, P_coefficient_center_lfe); + } + + P_output_left = AE_ROUND32F64SSYM(Q_tmp_left); + P_output_right = AE_ROUND32F64SSYM(Q_tmp_right); + + output_left[i * 2] = AE_SLAI32(P_output_left, 2 * sizeof(ae_int32)); + output_right[i * 2] = AE_SLAI32(P_output_right, 2 * sizeof(ae_int32)); + } +} + +void downmix16bit_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + uint32_t i; + + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_left_s_right_s; + ae_int32x2 P_coefficient_center_lfe; + + /* Load the downmix coefficients. */ + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_center = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + ae_int32x2 P_coefficient_left_surround = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT_SURROUND << 2); + ae_int32x2 P_coefficient_right_surround = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT_SURROUND << 2); + ae_int32x2 P_coefficient_lfe = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LFE << 2); + + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_left_s_right_s = AE_SEL32_LL(P_coefficient_left_surround, + P_coefficient_right_surround); + P_coefficient_center_lfe = AE_SEL32_LL(P_coefficient_center, P_coefficient_lfe); + + /* Downmixer single load. */ + ae_p16s *input_left = NULL; + ae_p16s *input_center = NULL; + ae_p16s *input_right = NULL; + ae_p16s *input_left_surround = NULL; + ae_p16s *input_right_surround = NULL; + ae_p16s *input_lfe = NULL; + + /* Only load the channel if it's present. */ + input_left = (ae_p16s *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT) << 1)); + input_center = (ae_p16s *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER) << 1)); + input_right = (ae_p16s *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT) << 1)); + input_left_surround = (ae_p16s *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT_SURROUND) << 1)); + input_right_surround = (ae_p16s *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT_SURROUND) << 1)); + input_lfe = (ae_p16s *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LFE) << 1)); + + /** Calculate number of samples in a single channel. */ + uint32_t number_of_samples_in_one_channel = in_size / cd->in_channel_no; + + number_of_samples_in_one_channel >>= 1; + + /* Downmixer single store. */ + ae_int32 *output_left = (ae_int32 *)(out_data + 0); + ae_int32 *output_right = (ae_int32 *)(out_data + sizeof(ae_int32)); + + ae_int32x2 P_input_left; + ae_int32x2 P_input_center; + ae_int32x2 P_input_right; + ae_int32x2 P_input_left_surround; + ae_int32x2 P_input_right_surround; + ae_int32x2 P_input_lfe; + + ae_int32x2 P_output_left; + ae_int32x2 P_output_right; + + /* + * We don't need to initialize those registers in loop's body. + * Using non accumulating multiplication for the first left,rignt channels + * is more efficient. + * For non 5.1 version we cannot use this optimization, we don't know + * which channel is present and which multiplication should reset output + * accumulators. + */ + ae_f64 Q_tmp_left = AE_ZERO64(); + ae_f64 Q_tmp_right = AE_ZERO64(); + + /* + * Calculating this outside of the loop is significant performance + * improvement. The value of this expression was reevaluated in each + * iteration when placed inside loop. + */ + int sample_offset = cd->in_channel_no << 1; + /* 0.011xSR(k) MIPS */ + for (i = 0; i < (number_of_samples_in_one_channel); i++) { + /* Load one 16-bit value, replicate in two elements of register P. */ + P_input_center = AE_L16M_X(input_center, i * sample_offset); + Q_tmp_left = AE_MULF32S_LH(P_input_center, P_coefficient_center_lfe); + + P_input_lfe = AE_L16M_X(input_lfe, i * sample_offset); + AE_MULAF32S_LL(Q_tmp_left, P_input_lfe, P_coefficient_center_lfe); + + Q_tmp_right = Q_tmp_left; + + P_input_left = AE_L16M_X(input_left, i * sample_offset); + AE_MULAF32S_LH(Q_tmp_left, P_input_left, P_coefficient_left_right); + + P_input_right = AE_L16M_X(input_right, i * sample_offset); + AE_MULAF32S_LL(Q_tmp_right, P_input_right, P_coefficient_left_right); + + P_input_left_surround = AE_L16M_X(input_left_surround, i * sample_offset); + AE_MULAF32S_LH(Q_tmp_left, P_input_left_surround, P_coefficient_left_s_right_s); + + P_input_right_surround = AE_L16M_X(input_right_surround, i * sample_offset); + AE_MULAF32S_LL(Q_tmp_right, P_input_right_surround, P_coefficient_left_s_right_s); + + P_output_left = AE_ROUND32F64SSYM(Q_tmp_left); + P_output_right = AE_ROUND32F64SSYM(Q_tmp_right); + + output_left[i * 2] = AE_SLAI32(P_output_left, 2 * sizeof(ae_int32)); + output_right[i * 2] = AE_SLAI32(P_output_right, 2 * sizeof(ae_int32)); + } +} + +void downmix16bit_4ch_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + size_t i; + + uint32_t idx1 = get_channel_index(cd->in_channel_map, 0); + uint32_t idx2 = get_channel_index(cd->in_channel_map, 1); + uint32_t idx3 = get_channel_index(cd->in_channel_map, 2); + uint32_t idx4 = get_channel_index(cd->in_channel_map, 3); + + uint16_t coeffs[4] = {cd->downmix_coefficients[idx1], + cd->downmix_coefficients[idx2], + cd->downmix_coefficients[idx3], + cd->downmix_coefficients[idx4] + }; + + ae_int16x4 coeff = AE_L16X4_X((ae_int16x4 *)coeffs, 0); + + const ae_int16x4 *input_data = (const ae_int16x4 *)in_data; + ae_int16 *output_data = (ae_int16 *)out_data; + + ae_int16x4 P_input; + ae_int16x4 P_output; + + const uint32_t channel_no = 4; + + for (i = 0; i < in_size / (sizeof(int16_t)); i += channel_no) { + AE_L16X4_XP(P_input, input_data, sizeof(ae_int16) * channel_no); + ae_f32x2 Q_tmp = AE_MULF16SS_00(P_input, coeff); + + AE_MULAF16SS_11(Q_tmp, P_input, coeff); + AE_MULAF16SS_22(Q_tmp, P_input, coeff); + AE_MULAF16SS_33(Q_tmp, P_input, coeff); + + P_output = AE_ROUND16X4F32SSYM(Q_tmp, Q_tmp); + AE_S16_0_IP(P_output, output_data, sizeof(ae_int16)); + } +} + +void downmix32bit_stereo(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + uint32_t i; + + uint32_t downmix_coefficient = 1073741568; + + /* Load the downmix coefficients. */ + ae_int32x2 P_coefficient_left_right = AE_L32_X((ae_int32 *)&downmix_coefficient, 0); + + ae_int32x2 P_input_left; + ae_int32x2 P_input_right; + ae_int32x2 P_output; + + const ae_int32 *input_left = (ae_int32 *)in_data; + const ae_int32 *input_right = input_left + 1; + + ae_int32 *output = (ae_int32 *)(out_data); + + for (i = 0; i < (in_size >> 3); ++i) { + P_input_left = AE_L32_X(input_left, i * 2 * sizeof(ae_int32)); + ae_f64 Q_tmp = AE_MULF32S_LH(P_input_left, P_coefficient_left_right); + + P_input_right = AE_L32_X(input_right, i * 2 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp, P_input_right, P_coefficient_left_right); + + P_output = AE_ROUND32F64SSYM(Q_tmp); + + output[i] = P_output; + } +} + +void downmix16bit_stereo(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + size_t idx; + + /* TODO: optimize using Hifi3 */ + const uint16_t *in_data16 = (uint16_t *)in_data; + uint16_t *out_data16 = (uint16_t *)out_data; + + for (idx = 0; idx < (in_size / 4); ++idx) + out_data16[idx] = (in_data16[2 * idx] / 2) + (in_data16[2 * idx + 1] / 2); +} + +void downmix32bit_3_1_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + size_t i; + + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_center_lfe; + + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_center = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + ae_int32x2 P_coefficient_lfe = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LFE << 2); + + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_center_lfe = AE_SEL32_LL(P_coefficient_center, P_coefficient_lfe); + + const ae_int32 *input_left = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT) << 2)); + const ae_int32 *input_center = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER) << 2)); + const ae_int32 *input_right = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT) << 2)); + const ae_int32 *input_lfe = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LFE) << 2)); + + ae_int32 *output = (ae_int32 *)(out_data); + + ae_int32x2 P_input_left; + ae_int32x2 P_input_center; + ae_int32x2 P_input_right; + ae_int32x2 P_input_lfe; + + ae_int32x2 P_output; + + const uint32_t channel_no = 4; + + for (i = 0; i < in_size / (sizeof(ae_int32)); i += channel_no) { + ae_f64 Q_tmp; + + AE_L32_IP(P_input_left, input_left, 4 * sizeof(ae_int32)); + Q_tmp = AE_MULF32S_LH(P_input_left, P_coefficient_left_right); + + AE_L32_IP(P_input_center, input_center, 4 * sizeof(ae_int32)); + AE_MULAF32S_LH(Q_tmp, P_input_center, P_coefficient_center_lfe); + + AE_L32_IP(P_input_right, input_right, 4 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp, P_input_right, P_coefficient_left_right); + + AE_L32_IP(P_input_lfe, input_lfe, 4 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp, P_input_lfe, P_coefficient_center_lfe); + + P_output = AE_ROUND32F64SSYM(Q_tmp); + + AE_S32_L_IP(P_output, output, sizeof(ae_int32)); + } +} + +void downmix32bit_4_0_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + size_t i; + + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_center_cs; + + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_center = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + ae_int32x2 P_coefficient_cs = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER_SURROUND << 2); + + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_center_cs = AE_SEL32_LL(P_coefficient_center, P_coefficient_cs); + + const ae_int32 *input_left = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT) << 2)); + const ae_int32 *input_center = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER) << 2)); + const ae_int32 *input_right = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT) << 2)); + const ae_int32 *input_cs = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER_SURROUND) << 2)); + + ae_int32 *output = (ae_int32 *)(out_data); + + ae_int32x2 P_input_left; + ae_int32x2 P_input_center; + ae_int32x2 P_input_right; + ae_int32x2 P_input_cs; + + ae_int32x2 P_output; + + const uint32_t channel_no = 4; + + for (i = 0; i < in_size / (sizeof(ae_int32)); i += channel_no) { + ae_f64 Q_tmp; + + AE_L32_IP(P_input_left, input_left, 4 * sizeof(ae_int32)); + Q_tmp = AE_MULF32S_LH(P_input_left, P_coefficient_left_right); + + AE_L32_IP(P_input_center, input_center, 4 * sizeof(ae_int32)); + AE_MULAF32S_LH(Q_tmp, P_input_center, P_coefficient_center_cs); + + AE_L32_IP(P_input_right, input_right, 4 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp, P_input_right, P_coefficient_left_right); + + AE_L32_IP(P_input_cs, input_cs, 4 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp, P_input_cs, P_coefficient_center_cs); + + P_output = AE_ROUND32F64SSYM(Q_tmp); + + AE_S32_L_IP(P_output, output, sizeof(ae_int32)); + } +} + +void downmix32bit_quatro_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + size_t i; + + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_ls_rs; + + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_ls = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT_SURROUND << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + ae_int32x2 P_coefficient_rs = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT_SURROUND << 2); + + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_ls_rs = AE_SEL32_LL(P_coefficient_ls, P_coefficient_rs); + + const ae_int32 *input_left = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT) << 2)); + const ae_int32 *input_ls = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT_SURROUND) << 2)); + const ae_int32 *input_right = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT) << 2)); + const ae_int32 *input_rs = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT_SURROUND) << 2)); + + ae_int32 *output = (ae_int32 *)(out_data); + + ae_int32x2 P_input_left; + ae_int32x2 P_input_ls; + ae_int32x2 P_input_right; + ae_int32x2 P_input_rs; + + ae_int32x2 P_output; + + const uint32_t channel_no = 4; + + for (i = 0; i < in_size / (sizeof(ae_int32)); i += channel_no) { + ae_f64 Q_tmp; + + AE_L32_IP(P_input_left, input_left, 4 * sizeof(ae_int32)); + Q_tmp = AE_MULF32S_LH(P_input_left, P_coefficient_left_right); + + AE_L32_IP(P_input_ls, input_ls, 4 * sizeof(ae_int32)); + AE_MULAF32S_LH(Q_tmp, P_input_ls, P_coefficient_ls_rs); + + AE_L32_IP(P_input_right, input_right, 4 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp, P_input_right, P_coefficient_left_right); + + AE_L32_IP(P_input_rs, input_rs, 4 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp, P_input_rs, P_coefficient_ls_rs); + + P_output = AE_ROUND32F64SSYM(Q_tmp); + + AE_S32_L_IP(P_output, output, sizeof(ae_int32)); + } +} + +void downmix32bit_5_1_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + size_t i; + + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_center_cs; + + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_center = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + ae_int32x2 P_coefficient_cs = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER_SURROUND << 2); + + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_center_cs = AE_SEL32_LL(P_coefficient_center, P_coefficient_cs); + + const ae_int32 *input_left = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT) << 2)); + const ae_int32 *input_center = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER) << 2)); + const ae_int32 *input_right = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT) << 2)); + const ae_int32 *input_cs = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER_SURROUND) << 2)); + + ae_int32 *output = (ae_int32 *)(out_data); + + ae_int32x2 P_input_left; + ae_int32x2 P_input_center; + ae_int32x2 P_input_right; + ae_int32x2 P_input_cs; + + ae_int32x2 P_output; + + const uint32_t channel_no = 6; + + for (i = 0; i < in_size / (sizeof(ae_int32)); i += channel_no) { + ae_f64 Q_tmp; + + AE_L32_IP(P_input_left, input_left, 6 * sizeof(ae_int32)); + Q_tmp = AE_MULF32S_LH(P_input_left, P_coefficient_left_right); + + AE_L32_IP(P_input_center, input_center, 6 * sizeof(ae_int32)); + AE_MULAF32S_LH(Q_tmp, P_input_center, P_coefficient_center_cs); + + AE_L32_IP(P_input_right, input_right, 6 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp, P_input_right, P_coefficient_left_right); + + AE_L32_IP(P_input_cs, input_cs, 6 * sizeof(ae_int32)); + AE_MULAF32S_LL(Q_tmp, P_input_cs, P_coefficient_center_cs); + + P_output = AE_ROUND32F64SSYM(Q_tmp); + + AE_S32_L_IP(P_output, output, sizeof(ae_int32)); + } +} + +void downmix32bit_7_1_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + size_t i; + + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_center_cs; + + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_center = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + ae_int32x2 P_coefficient_cs = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER_SURROUND << 2); + + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_center_cs = AE_SEL32_LL(P_coefficient_center, P_coefficient_cs); + + const ae_int32 *input_left = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT) << 2)); + const ae_int32 *input_center = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER) << 2)); + const ae_int32 *input_right = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT) << 2)); + const ae_int32 *input_cs = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER_SURROUND) << 2)); + + ae_int32 *output = (ae_int32 *)(out_data); + + ae_int32x2 P_input_left; + ae_int32x2 P_input_center; + ae_int32x2 P_input_right; + ae_int32x2 P_input_cs; + + ae_int32x2 P_output; + + const uint32_t channel_no = 8; + const size_t offset = sizeof(ae_int32) * channel_no; + + for (i = 0; i < in_size / (sizeof(ae_int32)); i += channel_no) { + ae_f64 Q_tmp; + + AE_L32_XP(P_input_left, input_left, offset); + Q_tmp = AE_MULF32S_LH(P_input_left, P_coefficient_left_right); + + AE_L32_XP(P_input_center, input_center, offset); + AE_MULAF32S_LH(Q_tmp, P_input_center, P_coefficient_center_cs); + + AE_L32_XP(P_input_right, input_right, offset); + AE_MULAF32S_LL(Q_tmp, P_input_right, P_coefficient_left_right); + + AE_L32_XP(P_input_cs, input_cs, offset); + AE_MULAF32S_LL(Q_tmp, P_input_cs, P_coefficient_center_cs); + + P_output = AE_ROUND32F64SSYM(Q_tmp); + + AE_S32_L_IP(P_output, output, sizeof(ae_int32)); + } +} + +void downmix32bit_7_1_to_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + uint32_t i; + + channel_map out_channel_map = cd->out_channel_map; + const uint8_t left_slot = get_channel_location(out_channel_map, CHANNEL_LEFT); + const uint8_t center_slot = get_channel_location(out_channel_map, CHANNEL_CENTER); + const uint8_t right_slot = get_channel_location(out_channel_map, CHANNEL_RIGHT); + uint8_t right_surround_slot = get_channel_location(out_channel_map, CHANNEL_RIGHT_SURROUND); + uint8_t left_surround_slot = get_channel_location(out_channel_map, CHANNEL_LEFT_SURROUND); + const uint8_t lfe_slot = get_channel_location(out_channel_map, CHANNEL_LFE); + + /* Must support also 5.1 Surround */ + const bool surround_5_1_channel_map = (left_surround_slot == CHANNEL_INVALID) && + (right_surround_slot == CHANNEL_INVALID); + + if (surround_5_1_channel_map) { + left_surround_slot = get_channel_location(out_channel_map, CHANNEL_LEFT_SIDE); + right_surround_slot = get_channel_location(out_channel_map, CHANNEL_RIGHT_SIDE); + } + + ae_int32 *output_left_ptr = (ae_int32 *)(out_data + (left_slot << 2)); + ae_int32 *output_center_ptr = (ae_int32 *)(out_data + (center_slot << 2)); + ae_int32 *output_right_ptr = (ae_int32 *)(out_data + (right_slot << 2)); + ae_int32 *output_side_left_ptr = (ae_int32 *)(out_data + (left_surround_slot << 2)); + ae_int32 *output_side_right_ptr = (ae_int32 *)(out_data + (right_surround_slot << 2)); + ae_int32 *output_lfe_ptr = (ae_int32 *)(out_data + (lfe_slot << 2)); + + ae_int32 *in_left_ptr = (ae_int32 *)in_data; + ae_int32 *in_center_ptr = (ae_int32 *)(in_data + 4); + ae_int32 *in_right_ptr = (ae_int32 *)(in_data + 8); + ae_int32 *in_lfe_ptr = (ae_int32 *)(in_data + 20); + + for (i = 0; i < (in_size >> 5); ++i) { + output_left_ptr[i * 6] = in_left_ptr[i * 8]; + output_right_ptr[i * 6] = in_right_ptr[i * 8]; + output_center_ptr[i * 6] = in_center_ptr[i * 8]; + output_lfe_ptr[i * 6] = in_lfe_ptr[i * 8]; + } + + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_left_s_right_s; + ae_int32x2 P_coefficient_center_lfe; + ae_int32x2 P_coefficient_left_S_right_S; + + /* Load the downmix coefficients. */ + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_center = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + ae_int32x2 P_coefficient_left_surround = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT_SURROUND << 2); + ae_int32x2 P_coefficient_right_surround = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT_SURROUND << 2); + ae_int32x2 P_coefficient_lfe = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LFE << 2); + ae_int32x2 P_coefficient_left_side = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT_SIDE << 2); + ae_int32x2 P_coefficient_right_side = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT_SIDE << 2); + + /* + * We have 6 coefficients (constant inside loop), 6 channels and only 8 + * AE_P registers. But each of those registers is 48bit or 2x24bit wide. + * Also many Hifi2 operations can use either lower or higher 24 bits from + * those registers. By combining 6 coefficients in pairs we save 3 AE_P + * registers. + */ + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_left_s_right_s = AE_SEL32_LL(P_coefficient_left_surround, + P_coefficient_right_surround); + P_coefficient_center_lfe = AE_SEL32_LL(P_coefficient_center, P_coefficient_lfe); + P_coefficient_left_S_right_S = AE_SEL32_LL(P_coefficient_left_side, + P_coefficient_right_side); + + const ae_int32 *input_left_surround = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT_SURROUND) << 2)); + const ae_int32 *input_right_surround = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT_SURROUND) << 2)); + const ae_int32 *input_left_side = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_LEFT_SIDE) << 2)); + const ae_int32 *input_right_side = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_RIGHT_SIDE) << 2)); + + /* We will be using P & Q registers. */ + ae_int32x2 P_input_left_surround; + ae_int32x2 P_input_right_surround; + ae_int32x2 P_input_left_side; + ae_int32x2 P_input_right_side; + + ae_int32x2 P_output_left; + ae_int32x2 P_output_right; + + const ae_int32 *const end_input_left = input_left_surround + (in_size / (sizeof(ae_int32))); + const char cs = 8 * sizeof(ae_int32); + + while (input_left_surround < end_input_left) { + ae_f64 Q_tmp_right_side; + ae_f64 Q_tmp_left_side; + + /* Load one 24-bit value, replicate in two elements of register P. */ + AE_L32_XP(P_input_left_surround, input_left_surround, cs); + Q_tmp_left_side = AE_MULF32S_LH(P_input_left_surround, P_coefficient_left_right); + Q_tmp_right_side = AE_MULF32S_LL(P_input_left_surround, P_coefficient_left_S_right_S); + + AE_L32_XP(P_input_right_surround, input_right_surround, cs); + AE_MULAF32S_LH(Q_tmp_left_side, P_input_right_surround, P_coefficient_left_S_right_S); + AE_MULAF32S_LL(Q_tmp_right_side, P_input_right_surround, P_coefficient_left_right); + + AE_L32_XP(P_input_left_side, input_left_side, cs); + AE_MULAF32S_LH(Q_tmp_left_side, P_input_left_side, P_coefficient_left_right); + + AE_L32_XP(P_input_right_side, input_right_side, cs); + AE_MULAF32S_LL(Q_tmp_right_side, P_input_right_side, P_coefficient_left_right); + + P_output_left = AE_ROUND32F64SSYM(Q_tmp_left_side); + P_output_right = AE_ROUND32F64SSYM(Q_tmp_right_side); + + AE_S32_L_IP(P_output_left, output_side_left_ptr, 6 * sizeof(ae_int32)); + AE_S32_L_IP(P_output_right, output_side_right_ptr, 6 * sizeof(ae_int32)); + } +} + +void upmix32bit_4_0_to_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + uint32_t i; + + channel_map out_channel_map = cd->out_channel_map; + + const uint8_t left_slot = get_channel_location(out_channel_map, CHANNEL_LEFT); + const uint8_t center_slot = get_channel_location(out_channel_map, CHANNEL_CENTER); + const uint8_t right_slot = get_channel_location(out_channel_map, CHANNEL_RIGHT); + uint8_t right_surround_slot = get_channel_location(out_channel_map, CHANNEL_RIGHT_SURROUND); + uint8_t left_surround_slot = get_channel_location(out_channel_map, CHANNEL_LEFT_SURROUND); + const uint8_t lfe_slot = get_channel_location(out_channel_map, CHANNEL_LFE); + + /* Must support also 5.1 Surround */ + const bool surround_5_1_channel_map = (left_surround_slot == CHANNEL_INVALID) && + (right_surround_slot == CHANNEL_INVALID); + + if (surround_5_1_channel_map) { + left_surround_slot = get_channel_location(out_channel_map, CHANNEL_LEFT_SIDE); + right_surround_slot = get_channel_location(out_channel_map, CHANNEL_RIGHT_SIDE); + } + + ae_int32 *output_left = (ae_int32 *)(out_data + (left_slot << 2)); + ae_int32 *output_center = (ae_int32 *)(out_data + (center_slot << 2)); + ae_int32 *output_right = (ae_int32 *)(out_data + (right_slot << 2)); + ae_int32 *output_side_left = (ae_int32 *)(out_data + (left_surround_slot << 2)); + ae_int32 *output_side_right = (ae_int32 *)(out_data + (right_surround_slot << 2)); + ae_int32 *output_lfe = (ae_int32 *)(out_data + (lfe_slot << 2)); + + ae_int32 *in_left_ptr = (ae_int32 *)in_data; + ae_int32 *in_center_ptr = (ae_int32 *)(in_data + 4); + ae_int32 *in_right_ptr = (ae_int32 *)(in_data + 8); + + for (i = 0; i < (in_size >> 4); ++i) { + output_left[i * 6] = in_left_ptr[i * 4]; + output_right[i * 6] = in_right_ptr[i * 4]; + output_center[i * 6] = in_center_ptr[i * 4]; + output_lfe[i * 6] = 0; + } + + ae_int32x2 P_coefficient_left_right; + ae_int32x2 P_coefficient_left_s_right_s; + ae_int32x2 P_coefficient_center_lfe; + ae_int32x2 P_coefficient_left_S_right_S; + + /* Load the downmix coefficients. */ + ae_int32x2 P_coefficient_left = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT << 2); + ae_int32x2 P_coefficient_center = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_CENTER << 2); + ae_int32x2 P_coefficient_right = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT << 2); + ae_int32x2 P_coefficient_left_surround = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT_SURROUND << 2); + ae_int32x2 P_coefficient_right_surround = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT_SURROUND << 2); + ae_int32x2 P_coefficient_lfe = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LFE << 2); + ae_int32x2 P_coefficient_left_side = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_LEFT_SIDE << 2); + ae_int32x2 P_coefficient_right_side = + AE_L32_X((ae_int32 *)cd->downmix_coefficients, CHANNEL_RIGHT_SIDE << 2); + + /* + * We have 6 coefficients (constant inside loop), 6 channels and only 8 + * AE_P registers. But each of those registers is 48bit or 2x24bit wide. + * Also many Hifi2 operations can use either lower or higher 24 bits from + * those registers. By combining 6 coefficients in pairs we save 3 AE_P + * registers. + */ + P_coefficient_left_right = AE_SEL32_LL(P_coefficient_left, P_coefficient_right); + P_coefficient_left_s_right_s = AE_SEL32_LL(P_coefficient_left_surround, + P_coefficient_right_surround); + P_coefficient_center_lfe = AE_SEL32_LL(P_coefficient_center, P_coefficient_lfe); + P_coefficient_left_S_right_S = AE_SEL32_LL(P_coefficient_left_side, + P_coefficient_right_side); + + const ae_int32 *input_center_surround = (ae_int32 *)(in_data + + (get_channel_location(cd->in_channel_map, CHANNEL_CENTER_SURROUND) << 2)); + + /* We will be using P & Q registers. */ + ae_int32x2 P_input_center_surround; + + ae_int32x2 P_output_left_side; + ae_int32x2 P_output_right_side; + + /* + * We don't need to initialize those registers in loop's body. + * Using non accumulating multiplication for the first left,rignt channels + * is more efficient. + * For non 5.1 version we cannot use this optimization, we don't know + * which channel is present and which multiplication should reset output + * accumulators. + */ + ae_f64 Q_tmp_right_side = AE_ZERO64(); + ae_f64 Q_tmp_left_side = AE_ZERO64(); + + const ae_int32 *const end_input_left = input_center_surround + (in_size / (sizeof(ae_int32))); + const char cs = 4 * sizeof(ae_int32); + + while (input_center_surround < end_input_left) { + /* Load one 24-bit value, replicate in two elements of register P. */ + AE_L32_XP(P_input_center_surround, input_center_surround, cs); + Q_tmp_left_side = AE_MULF32S_LH(P_input_center_surround, + P_coefficient_left_s_right_s); + Q_tmp_right_side = AE_MULF32S_LL(P_input_center_surround, + P_coefficient_left_s_right_s); + + P_output_left_side = AE_ROUND32F64SSYM(Q_tmp_left_side); + P_output_right_side = AE_ROUND32F64SSYM(Q_tmp_right_side); + + AE_S32_L_IP(P_output_left_side, output_side_left, 6 * sizeof(ae_int32)); + AE_S32_L_IP(P_output_right_side, output_side_right, 6 * sizeof(ae_int32)); + } +} + +void upmix32bit_quatro_to_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data) +{ + uint32_t i; + + channel_map out_channel_map = cd->out_channel_map; + + const uint8_t left_slot = get_channel_location(out_channel_map, CHANNEL_LEFT); + const uint8_t center_slot = get_channel_location(out_channel_map, CHANNEL_CENTER); + const uint8_t right_slot = get_channel_location(out_channel_map, CHANNEL_RIGHT); + uint8_t right_surround_slot = get_channel_location(out_channel_map, CHANNEL_RIGHT_SURROUND); + uint8_t left_surround_slot = get_channel_location(out_channel_map, CHANNEL_LEFT_SURROUND); + const uint8_t lfe_slot = get_channel_location(out_channel_map, CHANNEL_LFE); + + /* Must support also 5.1 Surround */ + const bool surround_5_1_channel_map = (left_surround_slot == CHANNEL_INVALID) && + (right_surround_slot == CHANNEL_INVALID); + + if (surround_5_1_channel_map) { + left_surround_slot = get_channel_location(cd->in_channel_map, CHANNEL_LEFT_SIDE); + right_surround_slot = get_channel_location(cd->in_channel_map, CHANNEL_RIGHT_SIDE); + } + + ae_int32 *output_left = (ae_int32 *)(out_data + (left_slot << 2)); + ae_int32 *output_center = (ae_int32 *)(out_data + (center_slot << 2)); + ae_int32 *output_right = (ae_int32 *)(out_data + (right_slot << 2)); + ae_int32 *output_side_left = (ae_int32 *)(out_data + (left_surround_slot << 2)); + ae_int32 *output_side_right = (ae_int32 *)(out_data + (right_surround_slot << 2)); + ae_int32 *output_lfe = (ae_int32 *)(out_data + (lfe_slot << 2)); + + ae_int32 *in_left_ptr = (ae_int32 *)in_data; + ae_int32 *in_right_ptr = (ae_int32 *)(in_data + 4); + ae_int32 *in_left_sorround_ptr = (ae_int32 *)(in_data + 8); + ae_int32 *in_right_sorround_ptr = (ae_int32 *)(in_data + 12); + + for (i = 0; i < (in_size >> 4); ++i) { + output_left[i * 6] = in_left_ptr[i * 4]; + output_right[i * 6] = in_right_ptr[i * 4]; + output_center[i * 6] = 0; + output_side_left[i * 6] = in_left_sorround_ptr[i * 4]; + output_side_right[i * 6] = in_right_sorround_ptr[i * 4]; + output_lfe[i * 6] = 0; + } +} + +#else + #error "Only hifi3 version supported." +#endif diff --git a/src/include/ipc4/base-config.h b/src/include/ipc4/base-config.h index cfe1b1829424..de54d6bb6d10 100644 --- a/src/include/ipc4/base-config.h +++ b/src/include/ipc4/base-config.h @@ -74,6 +74,19 @@ enum ipc4_channel_config { IPC4_CHANNEL_CONFIG_INVALID }; +enum ipc4_channel_index { + CHANNEL_LEFT = 0, + CHANNEL_CENTER = 1, + CHANNEL_RIGHT = 2, + CHANNEL_LEFT_SURROUND = 3, + CHANNEL_CENTER_SURROUND = 3, + CHANNEL_RIGHT_SURROUND = 4, + CHANNEL_LEFT_SIDE = 5, + CHANNEL_RIGHT_SIDE = 6, + CHANNEL_LFE = 7, + CHANNEL_INVALID = 0xF, +}; + enum ipc4_interleaved_style { IPC4_CHANNELS_INTERLEAVED = 0, IPC4_CHANNELS_NONINTERLEAVED = 1, diff --git a/src/include/ipc4/up_down_mixer.h b/src/include/ipc4/up_down_mixer.h new file mode 100644 index 000000000000..a4e2246016bd --- /dev/null +++ b/src/include/ipc4/up_down_mixer.h @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: BSD-3-Clause +// +// Copyright(c) 2022 Intel Corporation. All rights reserved. +// +// Author: Bartosz Kokoszko + +#ifndef __SOF_IPC4_UP_DOWN_MIXER_H__ +#define __SOF_IPC4_UP_DOWN_MIXER_H__ + +#include +#include "base-config.h" + +/** + * \brief bits field map which helps to describe each channel location a the data stream buffer + */ +typedef uint32_t channel_map; + +enum up_down_mix_coeff_select { + /**< module will use default coeffs */ + DEFAULT_COEFFICIENTS = 0, + /**< custom coeffs are required */ + CUSTOM_COEFFICIENTS, + /**< module will use default coeffs */ + DEFAULT_COEFFICIENTS_WITH_CHANNEL_MAP, + /**< custom coeffs are required */ + CUSTOM_COEFFICIENTS_WITH_CHANNEL_MAP +}; + +#define UP_DOWN_MIX_COEFFS_LENGTH 8 +#define IPC4_UP_DOWN_MIXER_MODULE_OUTPUT_PINS_COUNT 1 + +struct ipc4_up_down_mixer_module_cfg { + struct ipc4_base_module_cfg base_cfg; + + /* + * Output Channel Configuration. + * Together with audio_fmt.channel_config determines module conversion ratio. + * Please note that UpDownMixer module does not support all conversions. + */ + enum ipc4_channel_config out_channel_config; + + /**< Selects which coeffs are going to be used by UpDownMixer. */ + enum up_down_mix_coeff_select coefficients_select; + + /* + * Optional, when coefficients_select == #CUSTOM_COEFFICIENTS. For + * #CUSTOM_COEFFICIENTS expect coeffs array in quantity equal to + * #UP_DOWN_MIX_COEFFS_LENGTH. Values in this array should be + * in range <#MIN_COEFF_VALUE, #MAX_COEFF_VALUE>. + * + * Coefficients should be in order: + * 1. Left + * 2. Center + * 3. Right + * 4. Left Surround + * 5. Right Surround + * 6. Low Frequency Effects + */ + int32_t coefficients[UP_DOWN_MIX_COEFFS_LENGTH]; + + /* + * Optional, When coefficients_select is set to + * #DEFAULT_COEFFICIENTS_WITH_CHANNEL_MAP or + * #CUSTOM_COEFFICIENTS_WITH_CHANNEL_MAP, then it will be used for + * channel decoding. + */ + channel_map channel_map; +} __packed __aligned(8); + +#endif /* __SOF_IPC4_UP_DOWN_MIXER_H__ */ diff --git a/src/include/sof/audio/coefficients/up_down_mixer/up_down_mixer.h b/src/include/sof/audio/coefficients/up_down_mixer/up_down_mixer.h new file mode 100644 index 000000000000..7344039b640d --- /dev/null +++ b/src/include/sof/audio/coefficients/up_down_mixer/up_down_mixer.h @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: BSD-3-Clause +// +// Copyright(c) 2022 Intel Corporation. All rights reserved. +// +// Author: Bartosz Kokoszko +// Author: Adrian Bonislawski + +#include +#include +#include + +#if CONFIG_COMP_UP_DOWN_MIXER + +#define COMPUTE_COEFF_32BIT(counter, denominator) ((0x7fffffffULL * (counter)) / (denominator)) +#define COMPUTE_COEFF_16BIT(counter, denominator) ((0x7fffULL * (counter)) / (denominator)) + +const int32_t k_lo_ro_downmix32bit[UP_DOWN_MIX_COEFFS_LENGTH] = { + COMPUTE_COEFF_32BIT(1, 1), /* 1.0 - L */ + COMPUTE_COEFF_32BIT(707, 1000), /* 0.707 - Center */ + COMPUTE_COEFF_32BIT(1, 1), /* 1.0 - R */ + COMPUTE_COEFF_32BIT(707, 1000), /* 0.707 - Ls */ + COMPUTE_COEFF_32BIT(707, 1000), /* 0.707 - Rs */ + COMPUTE_COEFF_32BIT(100, 1000), /* 0.100 - LS */ + COMPUTE_COEFF_32BIT(100, 1000), /* 0.100 - RS */ + COMPUTE_COEFF_32BIT(000, 1000), /* 0.000 - LFE */ +}; + +const int32_t k_scaled_lo_ro_downmix32bit[UP_DOWN_MIX_COEFFS_LENGTH] = { + COMPUTE_COEFF_32BIT(414, 1000), /* 0.414 - L */ + COMPUTE_COEFF_32BIT(293, 1000), /* 0.293 - Center */ + COMPUTE_COEFF_32BIT(414, 1000), /* 0.414 - R */ + COMPUTE_COEFF_32BIT(293, 1000), /* 0.293 - Ls */ + COMPUTE_COEFF_32BIT(293, 1000), /* 0.293 - Rs */ + COMPUTE_COEFF_32BIT(100, 1000), /* 0.100 - LS */ + COMPUTE_COEFF_32BIT(100, 1000), /* 0.100 - RS */ + COMPUTE_COEFF_32BIT(000, 1000), /* 0.000 - LFE */ +}; + +const int32_t k_half_scaled_lo_ro_downmix32bit[UP_DOWN_MIX_COEFFS_LENGTH] = { + COMPUTE_COEFF_32BIT(586, 1000), /* 0.586 - L */ + COMPUTE_COEFF_32BIT(414, 1000), /* 0.414 - Center */ + COMPUTE_COEFF_32BIT(586, 1000), /* 0.586 - R */ + COMPUTE_COEFF_32BIT(414, 1000), /* 0.414 - Ls */ + COMPUTE_COEFF_32BIT(414, 1000), /* 0.414 - Rs */ + COMPUTE_COEFF_32BIT(100, 1000), /* 0.100 - LS */ + COMPUTE_COEFF_32BIT(100, 1000), /* 0.100 - RS */ + COMPUTE_COEFF_32BIT(000, 1000), /* 0.000 - LFE */ +}; + +const int32_t k_quatro_mono_scaled_lo_ro_downmix32bit[UP_DOWN_MIX_COEFFS_LENGTH] = { + COMPUTE_COEFF_32BIT(293, 1000), /* 0.293 - L */ + COMPUTE_COEFF_32BIT(207, 1000), /* 0.207 - Center */ + COMPUTE_COEFF_32BIT(293, 1000), /* 0.293 - R */ + COMPUTE_COEFF_32BIT(207, 1000), /* 0.207 - Ls */ + COMPUTE_COEFF_32BIT(207, 1000), /* 0.207 - Rs */ + COMPUTE_COEFF_32BIT(100, 1000), /* 0.100 - LS */ + COMPUTE_COEFF_32BIT(100, 1000), /* 0.100 - RS */ + COMPUTE_COEFF_32BIT(000, 1000), /* 0.000 - LFE */ +}; + +const int32_t k_lo_ro_downmix16bit[UP_DOWN_MIX_COEFFS_LENGTH] = { + COMPUTE_COEFF_16BIT(1, 1), /* 1.0 - L */ + COMPUTE_COEFF_16BIT(707, 1000), /* 0.707 - Center */ + COMPUTE_COEFF_16BIT(1, 1), /* 1.0 - R */ + COMPUTE_COEFF_16BIT(707, 1000), /* 0.707 - Ls, Cs */ + COMPUTE_COEFF_16BIT(707, 1000), /* 0.707 - Rs */ + COMPUTE_COEFF_16BIT(100, 1000), /* 0.100 - LS */ + COMPUTE_COEFF_16BIT(100, 1000), /* 0.100 - RS */ + COMPUTE_COEFF_16BIT(000, 1000), /* 0.000 - LFE */ +}; + +const int32_t k_scaled_lo_ro_downmix16bit[UP_DOWN_MIX_COEFFS_LENGTH] = { + COMPUTE_COEFF_16BIT(414, 1000), /* 0.414 - L */ + COMPUTE_COEFF_16BIT(293, 1000), /* 0.293 - Center */ + COMPUTE_COEFF_16BIT(414, 1000), /* 0.414 - R */ + COMPUTE_COEFF_16BIT(293, 1000), /* 0.293 - Ls Cs */ + COMPUTE_COEFF_16BIT(293, 1000), /* 0.293 - Rs */ + COMPUTE_COEFF_16BIT(100, 1000), /* 0.100 - LS */ + COMPUTE_COEFF_16BIT(100, 1000), /* 0.100 - RS */ + COMPUTE_COEFF_16BIT(000, 1000), /* 0.000 - LFE */ +}; + +const int32_t k_half_scaled_lo_ro_downmix16bit[UP_DOWN_MIX_COEFFS_LENGTH] = { + COMPUTE_COEFF_16BIT(586, 1000), /* 0.586 - L */ + COMPUTE_COEFF_16BIT(414, 1000), /* 0.414 - Center */ + COMPUTE_COEFF_16BIT(586, 1000), /* 0.586 - R */ + COMPUTE_COEFF_16BIT(414, 1000), /* 0.414 - Ls Cs */ + COMPUTE_COEFF_16BIT(414, 1000), /* 0.414 - Rs */ + COMPUTE_COEFF_16BIT(100, 1000), /* 0.100 - LS */ + COMPUTE_COEFF_16BIT(100, 1000), /* 0.100 - RS */ + COMPUTE_COEFF_16BIT(000, 1000), /* 0.000 - LFE */ +}; + +const int32_t k_quatro_mono_scaled_lo_ro_downmix16bit[UP_DOWN_MIX_COEFFS_LENGTH] = { + COMPUTE_COEFF_16BIT(293, 1000), /* 0.293 - L */ + COMPUTE_COEFF_16BIT(207, 1000), /* 0.207 - Center */ + COMPUTE_COEFF_16BIT(293, 1000), /* 0.293 - R */ + COMPUTE_COEFF_16BIT(207, 1000), /* 0.207 - Ls */ + COMPUTE_COEFF_16BIT(207, 1000), /* 0.207 - Rs */ + COMPUTE_COEFF_16BIT(100, 1000), /* 0.100 - LS */ + COMPUTE_COEFF_16BIT(100, 1000), /* 0.100 - RS */ + COMPUTE_COEFF_16BIT(000, 1000), /* 0.000 - LFE */ +}; + +#endif /* CONFIG_COMP_UP_DOWN_MIXER */ + diff --git a/src/include/sof/audio/up_down_mixer/up_down_mixer.h b/src/include/sof/audio/up_down_mixer/up_down_mixer.h new file mode 100644 index 000000000000..fb98ff79078b --- /dev/null +++ b/src/include/sof/audio/up_down_mixer/up_down_mixer.h @@ -0,0 +1,500 @@ +// SPDX-License-Identifier: BSD-3-Clause +// +// Copyright(c) 2022 Intel Corporation. All rights reserved. +// +// Author: Bartosz Kokoszko +// Author: Adrian Bonislawski + +#ifndef __SOF_AUDIO_UP_DOWN_MIXER_H__ +#define __SOF_AUDIO_UP_DOWN_MIXER_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** This type is introduced for better readability. */ +typedef const int32_t *downmix_coefficients; + +/** forward declaration */ +struct up_down_mixer_data; + +/** Function's pointer to up/down-mix routines. */ +typedef void (*up_down_mixer_routine)(struct up_down_mixer_data *cd, + const uint8_t * const in_data, + const uint32_t in_size, + uint8_t * const out_data); + +/** + * Simple 'macro-style' method to create proper channel map from a valid + * channel configuration. + */ +static inline channel_map create_channel_map(enum ipc4_channel_config channel_config) +{ + switch (channel_config) { + case IPC4_CHANNEL_CONFIG_MONO: + return (0xFFFFFFF0 | CHANNEL_CENTER); + case IPC4_CHANNEL_CONFIG_STEREO: + return (0xFFFFFF00 | CHANNEL_LEFT | (CHANNEL_RIGHT << 4)); + case IPC4_CHANNEL_CONFIG_2_POINT_1: + return (0xFFFFF000 | CHANNEL_LEFT | (CHANNEL_RIGHT << 4) | (CHANNEL_LFE << 8)); + case IPC4_CHANNEL_CONFIG_3_POINT_0: + return (0xFFFFF000 | CHANNEL_LEFT | (CHANNEL_CENTER << 4) | (CHANNEL_RIGHT << 8)); + case IPC4_CHANNEL_CONFIG_3_POINT_1: + return (0xFFFF0000 | CHANNEL_LEFT | (CHANNEL_CENTER << 4) | (CHANNEL_RIGHT << 8) + | (CHANNEL_LFE << 12)); + case IPC4_CHANNEL_CONFIG_QUATRO: + return (0xFFFF0000 | CHANNEL_LEFT | (CHANNEL_RIGHT << 4) + | (CHANNEL_LEFT_SURROUND << 8) | (CHANNEL_RIGHT_SURROUND << 12)); + case IPC4_CHANNEL_CONFIG_4_POINT_0: + return (0xFFFF0000 | CHANNEL_LEFT | (CHANNEL_CENTER << 4) | (CHANNEL_RIGHT << 8) + | (CHANNEL_CENTER_SURROUND << 12)); + case IPC4_CHANNEL_CONFIG_5_POINT_0: + return (0xFFF00000 | CHANNEL_LEFT | (CHANNEL_CENTER << 4) | (CHANNEL_RIGHT << 8) + | (CHANNEL_LEFT_SURROUND << 12) + | (CHANNEL_RIGHT_SURROUND << 16)); + case IPC4_CHANNEL_CONFIG_5_POINT_1: + return (0xFF000000 | CHANNEL_LEFT + | (CHANNEL_CENTER << 4) + | (CHANNEL_RIGHT << 8) + | (CHANNEL_LEFT_SURROUND << 12) + | (CHANNEL_RIGHT_SURROUND << 16) + | (CHANNEL_LFE << 20)); + case IPC4_CHANNEL_CONFIG_7_POINT_1: + return (CHANNEL_LEFT | (CHANNEL_CENTER << 4) + | (CHANNEL_RIGHT << 8) + | (CHANNEL_LEFT_SURROUND << 12) + | (CHANNEL_RIGHT_SURROUND << 16) + | (CHANNEL_LFE << 20) + | (CHANNEL_LEFT_SIDE << 24) + | (CHANNEL_RIGHT_SIDE << 28)); + case IPC4_CHANNEL_CONFIG_DUAL_MONO: + return (0xFFFFFF00 | CHANNEL_LEFT | (CHANNEL_LEFT << 4)); + case IPC4_CHANNEL_CONFIG_I2S_DUAL_STEREO_0: + return (0xFFFFFF00 | CHANNEL_LEFT | (CHANNEL_RIGHT << 4)); + case IPC4_CHANNEL_CONFIG_I2S_DUAL_STEREO_1: + return (0xFFFF00FF | (CHANNEL_LEFT << 8) | (CHANNEL_RIGHT << 12)); + default: + return 0xFFFFFFFF; + } +} + +static inline uint8_t get_channel_location(const channel_map map, + const enum ipc4_channel_index channel) +{ + uint8_t offset = 0xF; + uint8_t i; + + /* Search through all 4 bits of each byte in the integer for the channel. */ + for (i = 0; i < 8; i++) { + if (((map >> (i * 4)) & 0xF) == (uint8_t)channel) { + offset = i; + break; + } + } + + return offset; +} + +static inline enum ipc4_channel_index get_channel_index(const channel_map map, + const uint8_t location) +{ + return (enum ipc4_channel_index)((map >> (location * 4)) & 0xF); +} + +/** + * \brief up_down_mixer component private data. + */ +struct up_down_mixer_data { + struct ipc4_base_module_cfg base; + /** Number of channels in the input buffer. */ + size_t in_channel_no; + + /** Channel map in the input buffer. */ + channel_map in_channel_map; + + /** Channel configuration in the input buffer. */ + enum ipc4_channel_config in_channel_config; + channel_map out_channel_map; + + /** Function pointer to up/down-mix routine. */ + up_down_mixer_routine mix_routine; + + /** Downmix coefficients. */ + downmix_coefficients downmix_coefficients; + + struct ipc4_audio_format out_fmt[IPC4_UP_DOWN_MIXER_MODULE_OUTPUT_PINS_COUNT]; + + const int32_t k_lo_ro_downmix32bit[UP_DOWN_MIX_COEFFS_LENGTH]; + const int32_t k_scaled_lo_ro_downmix32bit[UP_DOWN_MIX_COEFFS_LENGTH]; + const int32_t k_half_scaled_lo_ro_downmix32bit[UP_DOWN_MIX_COEFFS_LENGTH]; + const int32_t k_quatro_mono_scaled_lo_ro_downmix32bit[UP_DOWN_MIX_COEFFS_LENGTH]; + const int32_t k_lo_ro_downmix16bit[UP_DOWN_MIX_COEFFS_LENGTH]; + const int32_t k_scaled_lo_ro_downmix16bit[UP_DOWN_MIX_COEFFS_LENGTH]; + const int32_t k_half_scaled_lo_ro_downmix16bit[UP_DOWN_MIX_COEFFS_LENGTH]; + const int32_t k_quatro_mono_scaled_lo_ro_downmix16bit[UP_DOWN_MIX_COEFFS_LENGTH]; + + /** In/out internal buffers */ + int32_t *buf_in; + int32_t *buf_out; +}; + +/** + * \brief 32 bit upmixer (mono -> 5_1). + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void upmix32bit_1_to_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 16 bit upmixer (mono -> 5_1). + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void upmix16bit_1_to_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 32 bit upmixer (2_0 -> 5_1). + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void upmix32bit_2_0_to_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 16 bit upmixer (2_0 -> 5_1). + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void upmix16bit_2_0_to_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 32 bit upmixer (2_0 -> 7_1). + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void upmix32bit_2_0_to_7_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 32 bit shift copier for mono streams. + * Copy the 32 MSB input mono stream (left, right) to 32 MSB stereo one. + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void shiftcopy32bit_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 32 bit shift copier for stereo. + * Copy the 32 MSB input streo stream (left, right) to 32 MSB one. + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void shiftcopy32bit_stereo(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 24 bit downmixer specialized for the 2.1. + * \note implementation is based on Downmix32bit + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix32bit_2_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 24 bit downmixer specialized for the 3.0. + * \note implementation is based on Downmix32bit + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix32bit_3_0(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 24 bit downmixer specialized for the 3.1. + * \note implementation is based on Downmix32bit + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix32bit_3_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 24 bit downmixer. This function is highly power consuming + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix32bit(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 24 bit downmixer specialized for the 4.0. + * \note implementation is based on Downmix32bit + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix32bit_4_0(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 32 bit downmixer specialized for the 5.0 to mono downmixing. + * \note implementation is based on Downmix32bit + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix32bit_5_0_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 32 bit downmixer specialized for the 5.1 to mono downmixing. + * \note implementation is based on Downmix32bit + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix32bit_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 32 bit downmixer specialized for the 7.1 to mono downmixing. + * \note implementation is based on Downmix32bit + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix32bit_7_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 16 bit shift copier for mono. + * Copy the 16 bit input mono stream (left, right) to 32 MSB stereo one. + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void shiftcopy16bit_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 16 bit shift copier for stereo. + * Copy the 16 bit input streo stream (left, right) to 32 MSB one. + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void shiftcopy16bit_stereo(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 16 bit downmixer. This function is highly power consuming + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix16bit(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 16 bit downmixer specialized for the 5.1. + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix16bit_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 16 bit downmixer 4 channels to mono. This function is highly power consuming + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix16bit_4ch_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 24 bit downmixer specialized for the 2.0. + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix32bit_stereo(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 16 bit downmix from stereo to mono. + * \note needs to be optimized!!! + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix16bit_stereo(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 32 bit downmixer specialized for the 3.1 to mono downmixing. + * \note implementation is based on Downmix32bit + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix32bit_3_1_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 32 bit downmixer specialized for the 4.0 to mono downmixing. + * \note implementation is based on Downmix32bit + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix32bit_4_0_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 32 bit downmixer specialized for the Quatro to mono downmixing. + * \note implementation is based on Downmix32bit + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix32bit_quatro_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 32 bit downmixer specialized for the 5.1 to mono downmixing. + * \note implementation is based on Downmix32bit + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix32bit_5_1_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 32 bit downmixer specialized for the 7.1 to mono downmixing. + * \note implementation is based on Downmix32bit + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix32bit_7_1_mono(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 32 bit downmixer specialized for the 7.1 to 5_1 downmixing. + * \note implementation is based on Downmix32bit + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void downmix32bit_7_1_to_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 32 bit downmixer specialized for the 4.0 to 5_1 + * \note implementation is based on Downmix32bit + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void upmix32bit_4_0_to_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +/** + * \brief 32 bit downmixer specialized for the Quatro to 5_1 + * \note implementation is based on Downmix32bit + * + * \param[in] cd Component private data. + * \param[in] in_data Input buffer. + * \param[in] in_size Input buffer size. + * \param[out] out_data Output buffer. + */ +void upmix32bit_quatro_to_5_1(struct up_down_mixer_data *cd, const uint8_t * const in_data, + const uint32_t in_size, uint8_t * const out_data); + +#endif /* __SOF_AUDIO_UP_DOWN_MIXER_H__ */